In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils import data
from torch.utils.data import DataLoader
from torch.utils.data import sampler

from sklearn import model_selection

import numpy as np
import pandas as pd
from pathlib import Path 
from imageio import imread

from skimage.transform import resize
import time

In [3]:
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples

NUM_TRAIN = 49000
NUM_VAL = 1000

def process_raw_data():
    t = time.time()
    X = []
    Y = []
    train_folder = Path('../data/imgs/train')

    for classname in train_folder.iterdir():
        if classname.name != '.DS_Store':
            for img in classname.iterdir():
                X.append(imread(img))
                Y.append(int(classname.name[1:]))
    X = np.array(X).reshape(-1,3,480,640)
    Y = np.array(Y)
    Xtr, Xva, Ytr, Yva = model_selection.train_test_split(X, Y, test_size=0.25, random_state=20)
    print('time used', time.time()-t)
    return Xtr, Xva, Ytr, Yva

In [4]:
Xtr, Xva, Ytr, Yva = process_raw_data()
print('Xtr', Xtr.shape)
print('Ytr', Ytr.shape)
print('Xva', Xva.shape)
print('Yva', Yva.shape)

time used 676.2898671627045
Xtr (16818, 3, 480, 640)
Ytr (16818,)
Xva (5606, 3, 480, 640)
Yva (5606,)


In [5]:
tensor_xtr = torch.from_numpy(Xtr)
tensor_ytr = torch.from_numpy(Ytr)
tensor_xva = torch.from_numpy(Xva)
tensor_yva = torch.from_numpy(Yva)

loader_tr = DataLoader(data.TensorDataset(tensor_xtr,tensor_ytr),batch_size=64,sampler=ChunkSampler(NUM_TRAIN, 0))
loader_va = DataLoader(data.TensorDataset(tensor_xva,tensor_yva),batch_size=64,sampler=ChunkSampler(NUM_VAL, 0))



In [7]:
model = nn.Sequential(
    nn.Conv2d(3, 8, kernel_size=10, stride=2),
    nn.ReLU(inplace=True),
    nn.BatchNorm2d(8),
    nn.Conv2d(8, 16, kernel_size=10, stride=2),
    nn.ReLU(inplace=True),
    nn.BatchNorm2d(16),
    nn.MaxPool2d(2,stride=2),
    nn.Flatten(),
    nn.Linear(70224, 1024),
    nn.ReLU(inplace=True),
    nn.Linear(1024, 10)
)

In [8]:
x = torch.randn(64, 3, 480, 640)
x_var = Variable(x)
ans = model(x_var)

np.array(ans.size())

array([64, 10])

In [16]:
print_every = 100

def train(model, loss_fn, optimizer, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(loader_tr):
            x_var = Variable(x.float())
            y_var = Variable(y.long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
def check_accuracy(model):
    print('Checking accuracy on validation set')  
    num_correct = 0
    num_samples = 0
    model.eval()
    for x, y in loader_va:
        x_var = Variable(x, volatile=True)

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model.parameters(), lr=1e-1)
train(model, loss_fn, optimizer, num_epochs=5)

Starting epoch 1 / 5
t = 100, loss = 0.3668
