In [1]:
from data import Data
import numpy as np
from matplotlib import pyplot as plt
import itertools
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
d = Data(first=300, x_transpose=(0, 3, 1, 2))
X_train,y_train = d.get_train()
X_cross, y_cross = d.get_dev()
X_test,y_test = d.get_test()

loaded 100
loaded 200
loaded 300
loaded 400
loaded 500
loaded 600


In [3]:
y_train = y_train
print(X_train.shape)
print(y_train.shape)
print(X_cross.shape)
print(y_cross.shape)
print(X_test.shape)
print(y_test.shape)

(317, 3, 128, 128)
(317,)
(97, 3, 128, 128)
(97,)
(74, 3, 128, 128)
(74,)


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torch.nn.functional as F
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

USE_GPU = False
if USE_GPU:
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
else:
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor

In [5]:
def flatten(X):
    N = X.shape[0]
    return X.view(N,-1)

In [6]:
class Flatten(nn.Module):
    def forward(self, x): 
        return flatten(x)

In [7]:
class MyCustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        img = self.X[index]
        label = self.y[index]
        return (img, label)

    def __len__(self):
        return self.X.shape[0]

In [8]:
NUM_TRAIN = X_train.shape[0]
train_dataset = MyCustomDataset(X_train, y_train)
loader_train = DataLoader(train_dataset, batch_size=20,sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cross_dataset = MyCustomDataset(X_cross, y_cross)
loader_cross = DataLoader(cross_dataset, batch_size=20)

In [9]:
def check_accuracy(loader, model): 
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            scores = model(Variable(x.float().type(dtypeFloat)))
            _, preds = scores.max(1)
            num_correct += (preds == Variable(y.long().type(dtypeLong))).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [10]:
def imshow_noax(img, normalize=True):
    """ Tiny helper to show images as uint8 and remove axis labels """
    print(img.shape)
    img = img.transpose(1,2,0)
    if normalize:
        img_max, img_min = np.max(img), np.min(img)
        img = 255.0 * (img - img_min) / (img_max - img_min)
    plt.imshow(img.astype('uint8'))
    plt.gca().axis('off')

In [14]:
print_every = 50
print_acc_every = 150
show_transformations = False

def train(m, optimizer, epochs=15):
    loss_arr = []
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            m.train()
            
            scores = m(Variable(x.float().type(dtypeFloat)))
            loss = F.cross_entropy(scores, Variable(y.long().type(dtypeLong)))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss))
                loss_arr += [loss.item()]
                if (t % print_acc_every == 0):
                    print ("train acc:")
                    check_accuracy(loader_train, m)
                    print ("cross acc:")
                    m.eval()
                    check_accuracy(loader_cross, m)
                    
                    # print transformations
                    if show_transformations:
                        x_ = x[5][None]
                        stn = next(m.modules())[0]
                        plt.subplot(1, 2, 1)
                        i = 50
                        imshow_noax(x_.data.numpy()[0], normalize=False)
                        plt.subplot(1, 2, 2)
                        stn_out = stn(Variable(x_.float().type(dtypeFloat))).data.numpy()[0]
                        imshow_noax(stn_out, normalize=False)
                        plt.show()
                    
    return loss_arr

In [15]:
attn_channel_1 = 4
attn_channel_2 = 10
attn_channel_3 = 10
attn_channel_4 = 10
attn_channel_5 = 8

# https://pytorch.org/tutorials/intermediate/spatial_transformer_tutorial.html
class STN(nn.Module):
    def __init__(self):
        super(STN, self).__init__()
        self.localization = nn.Sequential(
            nn.Conv2d(3, attn_channel_1, 5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv2d(attn_channel_1, attn_channel_2, 3, stride=1, padding=1),
#             nn.Conv2d(attn_channel_2, attn_channel_3, 3, stride=1, padding=1),
            nn.MaxPool2d(2),
            nn.ReLU(),
#             nn.Conv2d(attn_channel_3, attn_channel_4, 3, stride=1, padding=1),
#             nn.Conv2d(attn_channel_4, attn_channel_5, 3, stride=1, padding=1),
#             nn.MaxPool2d(2),
#             nn.ReLU()
        )
        self.fc_loc = nn.Sequential(
            nn.Linear(attn_channel_3 * 32 * 32, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
    
    def forward(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, attn_channel_3 * 32 * 32)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)
    
        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
    
        return x

In [None]:
channel_1 = 16
channel_2 = 32
channel_3 = 50
channel_4 = 75
channel_5 = 50
channel_6 = 50
channel_7 = 50
learning_rate = 2.2e-4
num_classes = 2

model = nn.Sequential(
#     STN(),
    nn.Conv2d(3, channel_1, 5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(channel_1, channel_2, 3, padding=1),
    nn.Conv2d(channel_2, channel_3, 3, padding=1),
    torch.nn.Dropout2d(p=0.5),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(channel_3, channel_4, 3, padding=1),
    nn.Conv2d(channel_4, channel_5, 3, padding=1),
    torch.nn.Dropout2d(p=0.5),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Conv2d(channel_5, channel_6, 3, padding=1),
    nn.Conv2d(channel_6, channel_7, 3, padding=1),
    torch.nn.Dropout2d(p=0.5),
    nn.MaxPool2d(2),
    Flatten(),
    nn.Linear(channel_7 * 8 * 8, num_classes)
)
if USE_GPU:
    model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr = learning_rate)
loss_arr = train(model, optimizer)

plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.plot(loss_arr)
plt.show()

Iteration 0, loss = 1.5174
train acc:
Got 154 / 317 correct (48.58)
cross acc:
Got 50 / 97 correct (51.55)
Iteration 0, loss = 0.8996
train acc:
Got 158 / 317 correct (49.84)
cross acc:
Got 50 / 97 correct (51.55)
Iteration 0, loss = 0.6454
train acc:
Got 184 / 317 correct (58.04)
cross acc:
Got 59 / 97 correct (60.82)
Iteration 0, loss = 0.6762
train acc:
Got 209 / 317 correct (65.93)
cross acc:
Got 57 / 97 correct (58.76)
Iteration 0, loss = 0.7299
train acc:
Got 177 / 317 correct (55.84)
cross acc:
Got 48 / 97 correct (49.48)
Iteration 0, loss = 0.6925
train acc:
Got 173 / 317 correct (54.57)
cross acc:
Got 47 / 97 correct (48.45)
Iteration 0, loss = 0.6097
train acc:
Got 205 / 317 correct (64.67)
cross acc:
Got 56 / 97 correct (57.73)
Iteration 0, loss = 0.6291
train acc:
Got 190 / 317 correct (59.94)
cross acc:
Got 49 / 97 correct (50.52)
Iteration 0, loss = 0.6029
train acc:
Got 192 / 317 correct (60.57)
cross acc:
Got 58 / 97 correct (59.79)
Iteration 0, loss = 0.6929
train acc: