In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] =(12,9)
import os
import copy
from IPython.display import clear_output
import torch
from torchvision import transforms
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
from torch.utils.data import DataLoader
from torch.autograd import Variable
import PIL
import torch.optim as optim

In [2]:
# initialize the dataloader and normalize the data 
class Dataset:
    """load the image / encoded object position representation pairs for training and testing data"""
    def __init__(self, path, mode = 'train'):
        self.path=path
        self.mode=mode
    def __getitem__(self, index):
        mode = self.mode
        if mode=='train':
            fname = '/train-%04d.jpg'
        elif mode=='test':
            fname = '/test-%04d.jpg'
            
        if mode=='train':
            fname1 = 'train-comp-%04d.npy'
        elif mode=='test':
            fname1 = 'test-comp-%04d.npy'
        img = PIL.Image.open(self.path+fname%index)
        vect = np.load(self.path+fname1%index)
        transform = transforms.Compose([#transforms.Scale((227,227)),
                                        transforms.ToTensor(),
                                        #transforms.Normalize(mean=[0.5],std=[0.25])
                                       ])
        img = transform(img)
        if mode=='train':
            img.requires_grad=True
        vect = torch.FloatTensor(np.concatenate(vect)) 
        return img, vect 

    def __len__(self):
        return len([f for f in os.listdir(self.path) if f.endswith('.jpg')])

# Initialize dataset iterators and find gpu if available 
train_data = Dataset('./data/training/',mode='train')
test_data = Dataset('./data/testing/',mode='test')
print('data is loaded')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device is '+ str(device))

data is loaded
device is cpu


In [18]:
class Net(nn.Module):
    """ this is lenet adapted to the problem """
    def __init__(self):
        super(Net, self).__init__()
        self.convnet = nn.Sequential(
            nn.Conv2d(1, 6, 7, 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2, 2), 2),
            nn.Conv2d(6, 16, 5, 2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2, 2), 2),
            nn.Conv2d(16, 120, 5, 1),
            nn.ReLU(inplace=True)
                                    )
        self.fc = nn.Sequential(
            nn.Linear(120*13**2, 5000),
            nn.ReLU(),
            nn.Linear(5000, 500),
                                )
    def forward(self, img):
        output = self.convnet(img)
        output = output.view(-1, 120*13**2)
        output = self.fc(output)
        return output

In [26]:
model = Net().to(device).train()

In [27]:
# initialize the model 
#model = Net().to(device).train()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,verbose=True)
losses = []

# set up the training loop and dataset iterator 
k = 250 #size of batch 
N = 500 #number epochs
b = int(len(train_data)/k) #number of batches
train_loader = DataLoader(train_data , batch_size = k, shuffle = True) #batch data loader

# train the network 
for epoch in range(N): # epoch iterator 
    epoch_loss = 0 # mean loss per epoch 
    for i, (inputs, targets) in enumerate(train_loader): # batch iterator 
        inputs, targets = inputs.to(device), targets.to(device) # batch to gpu
        optimizer.zero_grad() # zero gradients
        outputs = model(inputs) # model prediction
        loss = criterion(outputs,targets)  # loss computation
        loss.backward() # backpropagation
        optimizer.step() # gradient descent 
        epoch_loss+=loss.cpu().data.item() # pull the batch losses 
    epoch_loss /= i
    print('epoch loss: ',round(epoch_loss,2)) # print/store loss
    if epoch%10==0 and epoch!=0:     
        n = epoch
        #torch.save(model,'./partial-trains/%04d-epochs.pt'%n) # save partially trained model 
    losses.append(epoch_loss) # keep the losses 
scheduler.step(epoch_loss) # possibly modify the learning rate 

epoch loss:  457036.04
epoch loss:  381344.06
epoch loss:  152560.11
epoch loss:  114521.01
epoch loss:  102481.54
epoch loss:  76750.71
epoch loss:  67106.5
epoch loss:  50579.29
epoch loss:  47053.96
epoch loss:  40982.19
epoch loss:  36261.44
epoch loss:  34123.05
epoch loss:  33033.52
epoch loss:  31928.17
epoch loss:  31023.7
epoch loss:  30534.11
epoch loss:  29769.26
epoch loss:  28976.41
epoch loss:  28260.13
epoch loss:  27563.08
epoch loss:  26804.57
epoch loss:  25624.59
epoch loss:  24736.27
epoch loss:  23970.75
epoch loss:  23230.22
epoch loss:  22745.13
epoch loss:  22032.01
epoch loss:  21648.23
epoch loss:  21155.74
epoch loss:  20829.88
epoch loss:  20477.07
epoch loss:  20246.27
epoch loss:  20004.05
epoch loss:  19970.47
epoch loss:  19694.88
epoch loss:  19483.43
epoch loss:  19593.57
epoch loss:  19189.34
epoch loss:  19471.2
epoch loss:  19006.5
epoch loss:  18692.39
epoch loss:  18502.38
epoch loss:  18534.54
epoch loss:  18404.55
epoch loss:  18519.74
epoch los

KeyboardInterrupt: 

In [None]:
plt.plot(losses)

In [29]:
x.max()

tensor(1., grad_fn=<MaxBackward1>)

In [37]:
x,y = train_data[0]

In [38]:
transforms.Normalize((0.5, ), (0.5, ))(x).squeeze().detach()

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])