In [1]:
import torch
# import torch.utils.data as data
import h5py 

from torchvision.transforms import (CenterCrop, 
                                    Compose, 
                                    Normalize, 
                                    RandomHorizontalFlip,
                                    RandomResizedCrop, 
                                    Resize, 
                                    ToTensor,
                                    Lambda
                                   )

from torch.utils.data import DataLoader, Dataset


class KITTI(Dataset):
    # yeah the sources are the labels. 
    def __init__(self, datafile, sourcefile, nt):
        self.datafile = datafile
        self.sourcefile = sourcefile
        self.X = h5py.File(self.datafile, 'r')
        self.sources = h5py.File(self.sourcefile, 'r')
        self.nt = nt
        cur_loc = 0
        possible_starts = []

        my_array = self.X['data_0'][()]
        self.X = my_array 

        sources_array = self.sources['data_0'][()]
        self.sources = sources_array

        while cur_loc < self.X.shape[0] - self.nt + 1:
            if self.sources[cur_loc] == self.sources[cur_loc + self.nt - 1]:
                possible_starts.append(cur_loc)
                cur_loc += self.nt
            else:
                cur_loc += 1
        self.possible_starts = possible_starts

    def __getitem__(self, index):
        loc = self.possible_starts[index]
        return self.X[loc:loc+self.nt]


    def __len__(self):
        return len(self.possible_starts)

In [2]:
class TEST_KITTI(Dataset):
    def __init__(self, data, nt):
        self.data = data
#         self.sourcefile = sourcefile
#         self.X = h5py.File(self.datafile, 'r')
#         self.sources = h5py.File(self.sourcefile, 'r')
        self.nt = nt
#         cur_loc = 0
#         possible_starts = []

#         my_array = self.X['data_0'][()]
#         self.X = my_array 

#         sources_array = self.sources['data_0'][()]
#         self.sources = sources_array

#         while cur_loc < self.X.shape[0] - self.nt + 1:
#             if self.sources[cur_loc] == self.sources[cur_loc + self.nt - 1]:
#                 possible_starts.append(cur_loc)
#                 cur_loc += self.nt
#             else:
#                 cur_loc += 1
#         self.possible_starts = possible_starts

    def __getitem__(self, index):
#         loc = self.possible_starts[index]
        return self.data[index]


    def __len__(self):
        return len(self.data)

In [3]:
import torch
import math
import torch.nn as nn
from torch.nn import Parameter
from torch.nn import functional as F
from torch.autograd import Variable
from torch.nn.modules.utils import _pair

# https://gist.github.com/Kaixhin/57901e91e5c5a8bac3eb0cbbdd3aba81

class ConvLSTMCell(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=1, dilation=1, groups=1, bias=True):
        super(ConvLSTMCell, self).__init__()
        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.padding_h = tuple(
            k // 2 for k, s, p, d in zip(kernel_size, stride, padding, dilation))
        self.dilation = dilation
        self.groups = groups
        self.weight_ih = Parameter(torch.Tensor(
            4 * out_channels, in_channels // groups, *kernel_size))
        self.weight_hh = Parameter(torch.Tensor(
            4 * out_channels, out_channels // groups, *kernel_size))
        self.weight_ch = Parameter(torch.Tensor(
            3 * out_channels, out_channels // groups, *kernel_size))
        if bias:
            self.bias_ih = Parameter(torch.Tensor(4 * out_channels))
            self.bias_hh = Parameter(torch.Tensor(4 * out_channels))
            self.bias_ch = Parameter(torch.Tensor(3 * out_channels))
        else:
            self.register_parameter('bias_ih', None)
            self.register_parameter('bias_hh', None)
            self.register_parameter('bias_ch', None)
        self.register_buffer('wc_blank', torch.zeros(1, 1, 1, 1))
        self.reset_parameters()

    def reset_parameters(self):
        n = 4 * self.in_channels
        for k in self.kernel_size:
            n *= k
        stdv = 1. / math.sqrt(n)
        self.weight_ih.data.uniform_(-stdv, stdv)
        self.weight_hh.data.uniform_(-stdv, stdv)
        self.weight_ch.data.uniform_(-stdv, stdv)
        if self.bias_ih is not None:
            self.bias_ih.data.uniform_(-stdv, stdv)
            self.bias_hh.data.uniform_(-stdv, stdv)
            self.bias_ch.data.uniform_(-stdv, stdv)

    def forward(self, input, hx):
        h_0, c_0 = hx
        wx = F.conv2d(input, self.weight_ih, self.bias_ih,
                      self.stride, self.padding, self.dilation, self.groups)

        wh = F.conv2d(h_0, self.weight_hh, self.bias_hh, self.stride,
                      self.padding_h, self.dilation, self.groups)

        # Cell uses a Hadamard product instead of a convolution?
        wc = F.conv2d(c_0, self.weight_ch, self.bias_ch, self.stride,
                      self.padding_h, self.dilation, self.groups)

        wxhc = wx + wh + torch.cat((wc[:, :2 * self.out_channels], Variable(self.wc_blank).expand(
            wc.size(0), wc.size(1) // 3, wc.size(2), wc.size(3)), wc[:, 2 * self.out_channels:]), 1)

        i = F.sigmoid(wxhc[:, :self.out_channels])
        f = F.sigmoid(wxhc[:, self.out_channels:2 * self.out_channels])
        g = F.tanh(wxhc[:, 2 * self.out_channels:3 * self.out_channels])
        o = F.sigmoid(wxhc[:, 3 * self.out_channels:])

        c_1 = f * c_0 + i * g
        h_1 = o * F.tanh(c_1)
        return h_1, (h_1, c_1)

In [4]:
import os
# from natsort import natsorted
import numpy as np
import matplotlib
from imageio import imread
import matplotlib.pyplot as plt
import glob

#if not os.path.exists(DATA_DIR): os.mkdir(DATA_DIR)
desired_im_sz = (128, 160) #match kitti


# Create image datasets.
# Processes images and saves them in train, val, test splits.
def process_data():
    nt=10 #number of transformations per image
    numTransf = 10
    step = 1 # choose obj every 10 degrees of movement
    
    # so combine the steps of the da/scratch365/jhuang24/dataset_v1_3_partition/train_valid/known_known/00403/ztaloaders 
    root_obj = '/scratch365/jhuang24/dataset_v1_3_partition/train_valid/known_known/00403/'
    
    
    
#     json_data_base = '/afs/crc.nd.edu/user/j/jdulay'
#     train_known_known_with_rt_path = os.path.join(json_data_base, "train_known_known_with_rt.json")
    
#     with open(train_known_known_with_rt_path) as f:
#         data = json.load(f)
#         print("Json file loaded: %s" % json_path)
        

#     jDirs = objDirs[:8000]
    
    
    # so dump all the classes into a big pile
    
    
    stimuli = glob.glob(os.path.join(root_obj,'*.JPEG'))
    #testper = 1.0 #technically does nothing
#     stimuli=natsorted(stimuli)
    #test = objDirs[valend:]
    
#     with open(os.path.join(root, 'test.txt'), 'w') as f:
#         f.write('\n'.join(stimuli)+'\n')
        
#     print(stimuli)
    X_data = np.zeros((len(stimuli),) + (nt,) + (128, 160) + (3,), np.uint8)
    print(X_data.shape)
    for i, objID in enumerate(stimuli): #0-4000
#         print(objID)
        for transID in range(0, numTransf, step): #starts at 0, up to not including nt
#                 print(os.path.join(root_of_objects,objID))
#                 image=imread(os.path.join(root_of_objects,objID))
#             image=imread(os.path.join(root_obj,objID))
            
            image = Image.open(os.path.join(root_obj, objID))
            image = image.resize((128, 160))
            im_arr = np.asarray(image)
            im_arr = np.rollaxis(im_arr, 1, 0)
            
#             sanity_pill = Image.fromarray(im_arr)
#             sanity_pill.save('sanity2.jpg')
#             print('im_arr shape', im_arr.shape)
            
        
        
#             print('the path is um, ', os.path.join(root_obj,objID))
            # we don't need the weird json loading stuff here, because of how
            # we set it up before ... 
            #item = data[str(transID)]
            # transID is just a num 
            #
            
            #image = imread(item["img_path"])
                
#             print("checkpoint_1")
            #image = cv2.resize(image, (desired_im_sz[1], desired_im_sz[0]))
#             print('pre image shape is,', image.shape)



#             image = np.resize(image, (desired_im_sz[0], desired_im_sz[1], 3))

    
    
#             print('pre image shape is,', image.shape)
#             print("checkpoint_2")
#             print(transID/step)

#             print('shape is ', image.shape)
            # X_data[i, (transID/step)] = process_im(image, desired_im_sz)
#             print(transID/step)
#             print(i)
#             print(X_data.shape)
#             print('type of the image going in is', type(image))
            #print('eek', X_data[0,(transID/step)])
            #X_data[i, (transID/step)] = process_im(image, desired_im_sz[1])
#             print('type of image is', type(image))
#             print('image shape is', image.shape)
            
#             print('image shape is', np.rollaxis(image, 0, 3).shape)

#             im = np.rollaxis(image, 2, 0)
#             im = Image.fromarray(np.rollaxis(image, 2, 0))
#             im = Image.fromarray(image)

#             image.save('sanity_in_process.jpg')
        
            X_data[i, (transID//step)] = im_arr
#             1/0
#             print("checkpoint_3")

    # from the other stuff, we want the batch nt chan h w
    X_data = np.transpose(X_data,(0,1,4,2,3)) #changing the position of numChannels
    X_data = (X_data.astype(np.float32))/255 #normalize the image
    
    return X_data


# resize and crop image
def process_im(im, desired_sz):
#     print('in proces')
#     print('1im shape is', im.shape)
    target_ds = float(desired_sz[0])/im.shape[0]
    im = np.resize(im, (desired_sz[0], int(np.round(target_ds * im.shape[1]))))
#     print('2im shape is', im.shape)
    d = (im.shape[1] - desired_sz[1]) / 2
#     print('d shape is', d)
    im = im[:, d:d+desired_sz[1]]
#     print('im shape is', im.shape)
    return im


In [5]:
X_data = process_data()

(1200, 10, 128, 160, 3)


NameError: name 'Image' is not defined

In [None]:
def info(prefix, var):
    print('-------{}----------'.format(prefix))
    if isinstance(var, torch.autograd.variable.Variable):
        print('Variable:')
        print('size: ', var.data.size())
        print('data type: ', type(var.data))
    elif isinstance(var, torch.FloatTensor) or isinstance(var, torch.cuda.FloatTensor):
        print('Tensor:')
        print('size: ', var.size())
        print('type: ', type(var))
    else:
        print(type(var))

In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
# from convlstmcell import ConvLSTMCell
from torch.autograd import Variable


class PredNet(nn.Module):
    def __init__(self, R_channels, A_channels, output_mode='error'):
        super(PredNet, self).__init__()
        self.r_channels = R_channels + (0, )  # for convenience
        self.a_channels = A_channels
        self.n_layers = len(R_channels)
        self.output_mode = output_mode

        default_output_modes = ['prediction', 'error']
        assert output_mode in default_output_modes, 'Invalid output_mode: ' + str(output_mode)

        for i in range(self.n_layers):
            cell = ConvLSTMCell(2 * self.a_channels[i] + self.r_channels[i+1],                                                                             self.r_channels[i],
                                (3, 3))
            setattr(self, 'cell{}'.format(i), cell)

        for i in range(self.n_layers):
            conv = nn.Sequential(nn.Conv2d(self.r_channels[i], self.a_channels[i], 3, padding=1), nn.ReLU())
            if i == 0:
                conv.add_module('satlu', SatLU())
            setattr(self, 'conv{}'.format(i), conv)


        self.upsample = nn.Upsample(scale_factor=2)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        for l in range(self.n_layers - 1):
            update_A = nn.Sequential(nn.Conv2d(2* self.a_channels[l], self.a_channels[l+1], (3, 3), padding=1), self.maxpool)
            setattr(self, 'update_A{}'.format(l), update_A)

        self.reset_parameters()

    def reset_parameters(self):
        for l in range(self.n_layers):
            cell = getattr(self, 'cell{}'.format(l))
            cell.reset_parameters()

    def forward(self, input):

        R_seq = [None] * self.n_layers
        H_seq = [None] * self.n_layers
        E_seq = [None] * self.n_layers

        w, h = input.size(-2), input.size(-1)
        batch_size = input.size(0)

        for l in range(self.n_layers):
            # E_seq[l] = Variable(torch.zeros(batch_size, 2*self.a_channels[l], w, h))
            # R_seq[l] = Variable(torch.zeros(batch_size, self.r_channels[l], w, h))
            E_seq[l] = Variable(torch.zeros(batch_size, 2*self.a_channels[l], w, h)).cuda()
            R_seq[l] = Variable(torch.zeros(batch_size, self.r_channels[l], w, h)).cuda()
            w = w//2
            h = h//2
        time_steps = input.size(1)
        total_error = []
        
        for t in range(time_steps):
            A = input[:,t]
            # A = A.type(torch.FloatTensor)
            A = A.type(torch.cuda.FloatTensor)
            
            for l in reversed(range(self.n_layers)):
                cell = getattr(self, 'cell{}'.format(l))
                if t == 0:
                    E = E_seq[l]
                    R = R_seq[l]
                    hx = (R, R)
                else:
                    E = E_seq[l]
                    R = R_seq[l]
                    hx = H_seq[l]
                if l == self.n_layers - 1:
                    R, hx = cell(E, hx)
                else:
                    tmp = torch.cat((E, self.upsample(R_seq[l+1])), 1)
                    R, hx = cell(tmp, hx)
                R_seq[l] = R
                H_seq[l] = hx


            for l in range(self.n_layers):
                conv = getattr(self, 'conv{}'.format(l))
                A_hat = conv(R_seq[l])
                if l == 0:
                    frame_prediction = A_hat
                pos = F.relu(A_hat - A)
                neg = F.relu(A - A_hat)
                E = torch.cat([pos, neg],1)
                E_seq[l] = E
                if l < self.n_layers - 1:
                    update_A = getattr(self, 'update_A{}'.format(l))
                    A = update_A(E)
            if self.output_mode == 'error':
                mean_error = torch.cat([torch.mean(e.view(e.size(0), -1), 1, keepdim=True) for e in E_seq], 1)
                # batch x n_layers
                total_error.append(mean_error)

        if self.output_mode == 'error':
            return torch.stack(total_error, 2) # batch x n_layers x nt
        elif self.output_mode == 'prediction':
            return frame_prediction


class SatLU(nn.Module):

    def __init__(self, lower=0, upper=255, inplace=False):
        super(SatLU, self).__init__()
        self.lower = lower
        self.upper = upper
        self.inplace = inplace

    def forward(self, input):
        return F.hardtanh(input, self.lower, self.upper, self.inplace)


    def __repr__(self):
        inplace_str = ', inplace' if self.inplace else ''
        return self.__class__.__name__ + ' ('\
            + 'min_val=' + str(self.lower) \
    + ', max_val=' + str(self.upper) \
    + inplace_str + ')'

In [None]:
class DARPA_ReactionTimeDataset(Dataset):
    def __init__(self,
                 json_path,
                 transform):

        with open(json_path) as f:
            data = json.load(f)
        #print("Json file loaded: %s" % json_path)

        self.data = data
        self.transform = transform
        self.random_weight = None

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[str(idx)]

        # Open the image and do normalization and augmentation
        img = Image.open(item["img_path"])
        img = img.convert('RGB')
        # needed this transform call
        img = self.transform(img)
        
        # Deal with reaction times
        if item["RT"] != None:
            rt = item["RT"]
        else:
            rt = 0

        return {
            "pixel_values": img,
            "label": item["label"],
            "rt": rt,
            "category": item["category"]
        }


In [None]:
def collate_fn(batch):
    pixel_values = torch.stack([x["pixel_values"] for x in batch])
    labels = torch.tensor([x["label"] for x in batch])
    rt = torch.tensor([x["rt"] for x in batch])

    return {"pixel_values": pixel_values, "label": labels, "rt": rt}



In [179]:
import torch
import os
import numpy as np
import json
# import hickle as hkl

from torch.utils.data import DataLoader
from torch.autograd import Variable
# from kitti_data import KITTI
# from prednet import PredNet

import torchvision

from PIL import Image

def save_image(tensor, filename, nrow=8, padding=2,
               normalize=False, range=None, scale_each=False, pad_value=0):
    from PIL import Image
#     im = Image.fromarray(np.rollaxis(tensor.numpy(), 0, 3))
    im = Image.fromarray(tensor.numpy())
    print('in save_image. im shape is', im.shape)
    im.save(filename)
# from scipy.misc import imshow, imsave

batch_size = 4
A_channels = (3, 48, 96, 192)
R_channels = (3, 48, 96, 192)
nt = 10

# DATA_DIR = './kitti_data'

# but we need to change this to our test data that we use in the other notebook 

# TESTING DATA HERE >>>>

# normalize = imagenetMeans TODO
train_transforms = Compose(
        [
            RandomResizedCrop(224),
            RandomHorizontalFlip(),
            ToTensor(),
            Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ]
    )

# old get item required an index grab too
testdataset = TEST_KITTI(data=X_data, nt=nt) # batched kitti dataset
test_loader = DataLoader(testdataset, batch_size=4, num_workers=8)





#TODO: just split up the dataset fairly 
# maybe just train/test only
# json_data_base = '/afs/crc.nd.edu/user/j/jdulay'
# train_known_known_with_rt_path = os.path.join(json_data_base, "train_known_known_with_rt.json")
# valid_known_known_with_rt_path = os.path.join(json_data_base, "valid_known_known_with_rt.json")


# # valdataset = traindataset
# testdataset = ReactionTimeDataset(json_path=valid_known_known_with_rt_path,
#                                         transform=train_transforms)

# labels = []
# #TODO might need to do this for train idk
# for i in range(len(testdataset)):
#     item = testdataset.data[str(i)]['label']
#     if item not in labels:
#         labels.append(item)
# len(labels)

# test_loader = DataLoader(testdataset, batch_size=16, num_workers=8, shuffle=False, collate_fn=collate_fn)

# >>>>>>>>>



# DATA_DIR = '/media/lei/000F426D0004CCF4/datasets/kitti_data'
# test_file = os.path.join(DATA_DIR, 'X_test.hkl')
# test_sources = os.path.join(DATA_DIR, 'sources_test.hkl')


# kitti_test = KITTI(test_file, test_sources, nt)


if torch.cuda.is_available():
    device = 'cuda'
    print('Using GPU.')
else:
    device = 'cpu'
    print('Using CPU.')

model = PredNet(R_channels, A_channels, output_mode='prediction').to(device)
model.load_state_dict(torch.load('kitti_training.pt'))


dataiter = iter(test_loader)
inputs = dataiter.next()


# print('images are', images)
print('inputs shapes are', inputs.shape)

inputs = Variable(inputs.to(device))
origin = inputs.cpu()[:, nt-1]
print('origin:')
print(type(origin))
print(origin.size())

print('here')
print(origin[0].size())

o_np = origin[1].numpy()
print('numpy shape', o_np.shape)
print('numpy type', o_np.dtype)
print(o_np)

# let's see if we can move the stuff around 
channel_op = np.moveaxis(o_np, 0, -1)


print('numpy shape 2', channel_op.shape)

channel_op = (channel_op * 255).astype(np.uint8)
# print(channel_op)
print('chan op shape', channel_op.shape)

# 1/0

im2 = (o_np[0] * 255).astype(np.uint8)
print(im2)
print(im2.shape)

# im = Image.fromarray((o_np * 255).astype(np.uint8))
im = Image.fromarray(channel_op)



# print('in save_image. im shape is', im.shape)
im.save('origin_sanity.png')


# 1/0


# origin = torchvision.utils.make_grid(origin, nrow=4)
# save_image(origin, 'origin.jpg')
pred = model(inputs)
pred = pred.data.cpu().byte()
print('preds below:')
print(type(pred))
print(pred.size())


# pred = torchvision.utils.make_grid(pred, nrow=4)
p_np = pred[1].numpy()

p_np = np.moveaxis(p_np, 0, -1)

print('pred shape', p_np.shape)
print('numpy type', p_np.dtype)
print(p_np)

# im3 = p_np

im3 = (p_np * 255).astype(np.uint8)
print(im3)
print(im3.shape)
# im = Image.fromarray((o_np * 255).astype(np.uint8))
foo = Image.fromarray(im3)

foo.save('pred.png')

# save_image(pred, 'predicted.jpg')

# print('um sanity check')



2/0


for i, batch in enumerate(test_loader):
    print('DEBUG:')
    
    #unpack inputs 
#     inputs = batch['pixel_values']
#     print('inputs are', inputs)    
    
    # okay, so in the kitti code, each time step is an image 
    # so stack the stuff with blanks
    # or img copies? 
    
    # ...
    
#     inputs = inputs[:,None,:,:,:]
#     print('inputs shape are', inputs.shape)

#     inputs[1] = torch.Tensor([10 for _ in range(10)])
    # add some sort of thing for this 
    # add dim 
    # then expand it to everything 
    
#     print('inputs shape are', inputs.shape)

    
    # maybe see the jeff code for this?
    # or some weird foundation thing
    
    inputs = inputs.permute(0, 1, 4, 2, 3) # batch x time_steps x channel x width x height
    inputs = Variable(inputs.to(device))
    origin = inputs.data.cpu().byte()[:, nt-1]
    print('origin:')
    print(type(origin))
    print(origin.size())

    print('predicted:')
    pred = model(inputs)
    pred = pred.data.cpu().byte()
    print(type(pred))
    print(pred.size())
    origin = torchvision.utils.make_grid(origin, nrow=4)
    pred = torchvision.utils.make_grid(pred, nrow=4)
#     save_image(origin, 'origin.jpg')
#     save_image(pred, 'predicted.jpg')
    break



Using GPU.
inputs shapes are torch.Size([4, 10, 3, 128, 160])
origin:
<class 'torch.Tensor'>
torch.Size([4, 3, 128, 160])
here
torch.Size([3, 128, 160])
numpy shape (3, 128, 160)
numpy type float32
[[[0.41568628 0.41568628 0.41960785 ... 0.65882355 0.7019608  0.7019608 ]
  [0.42745098 0.43529412 0.42745098 ... 0.6509804  0.654902   0.6666667 ]
  [0.43529412 0.45490196 0.43529412 ... 0.6627451  0.67058825 0.65882355]
  ...
  [0.3882353  0.3882353  0.40784314 ... 0.6        0.5764706  0.5647059 ]
  [0.3764706  0.39607844 0.38039216 ... 0.63529414 0.5803922  0.58431375]
  [0.37254903 0.38039216 0.3647059  ... 0.627451   0.5803922  0.58431375]]

 [[0.3647059  0.36862746 0.37254903 ... 0.6156863  0.654902   0.654902  ]
  [0.38431373 0.38039216 0.38039216 ... 0.6117647  0.62352943 0.6509804 ]
  [0.3882353  0.4117647  0.40392157 ... 0.63529414 0.6509804  0.65882355]
  ...
  [0.33333334 0.34117648 0.36078432 ... 0.5686275  0.5372549  0.5372549 ]
  [0.32941177 0.34117648 0.34509805 ... 0.584313

ZeroDivisionError: division by zero