In [183]:
import torch as torch
import torchvision as tv
from torch.utils import data
import pandas as pd
import numpy as np
from PIL import Image
from functools import partial
from ipywidgets import interact, interactive
import ipywidgets as widgets
import matplotlib.pyplot as plt
from tqdm import tqdm

# Data

In [2]:
DATA_PATH = '/home/ubuntu/data/dogbreed'
labels = pd.read_csv(f'{DATA_PATH}/labels.csv')
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [172]:
class DogsDataset(data.dataset.Dataset):
    
    def __init__(self, data_path, transform=None):
        super().__init__()
        self.labels_df = pd.read_csv(f'{data_path}/labels.csv')
        self.data_path = data_path
        self.transform = transform
        
    def __len__(self):
        return self.labels_df.shape[0]
    
    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]
        id, breed = row['id'], row['breed']
        #img = tv.transforms.ToTensor()(Image.open(f'{self.data_path}/train/{id}.jpg'))
        img = Image.open(f'{self.data_path}/train/{id}.jpg')
        return self.transform(img), breed

In [4]:
# sanity check
ds = DogsDataset(DATA_PATH)
im, label = next(iter(ds))

In [10]:
def resize_image(target_size, image_path):
    image = Image.open(image_path)
    w, h = image.size
    target_ratio = target_size / min(w, h)
    new_w = np.floor(max(w * target_ratio, target_size))
    new_h = np.floor(max(h * target_ratio, target_size))
    return image.resize((int(new_w), int(new_h)))

def transform_files(file_names, source_dir, dest_dir, tform):
    for name in file_names:
        file = tform(f'{source_dir}/{name}')
        file.save(f'{dest_dir}/{name}')

# Model

In [38]:
# Preprocessing:
# - (1) Rescale images to have shortest side of 340px.
# - (2) Take center 224 x 224 crop.

In [None]:
# (1) Rescale images to have shortest side of 340px
source_dir = '/home/ubuntu/data/dogbreed/train'
dest_dir = '/home/ubuntu/data/dogbreed/tmp/funk/340'
img_names = labels['id'].map(lambda id: id + '.jpg')
transform_files(img_names, source_dir, dest_dir, partial(resize_image, 224))

In [155]:
# (2) Take random 224 x 224 crop
pil_tform = tv.transforms.CenterCrop(224)

In [156]:
# sanity_check
def show_input_image(idx):
    img = pil_tform(Image.open(f'{dest_dir}/{img_names.iloc[idx]}'))
    plt.imshow(img)
    
interact(show_input_image, idx=widgets.IntSlider(min=0, max=img_names.shape[0] -1 , value = 0))

# TODO:
# - scaling, reflections..etc

interactive(children=(IntSlider(value=0, description='idx', max=10221), Output()), _dom_classes=('widget-inter…

<function __main__.show_input_image(idx)>

In [92]:
class SimpleCNN(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.cv_layers = torch.nn.Sequential(torch.nn.Conv2d(3, 8, kernel_size=7, stride=1, padding=3),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2),
                                             torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc_layers = torch.nn.Sequential(torch.nn.Linear(16 * 56 * 56, 1028),
                                             torch.nn.ReLU(),
                                             torch.nn.Linear(1028, 2),
                                             torch.nn.ReLU())
    def forward(self, x):
        x = self.cv_layers(x)
        x = x.view(x.size(0), -1)
        return self.fc_layers(x)

In [176]:
# sanity check
model = SimpleCNN()
tensor = (tv.transforms.ToTensor()(tform(Image.open(f'{dest_dir}/{img_names.iloc[0]}')))).unsqueeze(0)
model(torch.autograd.Variable(tensor)).shape

torch.Size([1, 2])

In [190]:
def train_epoch(trainloader, model, criterion, optimizer, print_freq=100):
    ''' run a single epoch of training'''
    model.train()
    for idx, (input, target) in enumerate(trainloader):
        #output = model(input)
        putput = model(torch.autograd.Variable(tensor))
        #loss = criterion(output, target)
        #optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()

def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in tqdm(range(1, epochs + 1)):
        print(f'Epoch: {epoch} \n')
        train_epoch(trainloader, model, criterion, optimizer)
    top1acc = test(validationloader, model, criterion)
    print(top1acc.data)

In [184]:
print(torch.__version__)

0.3.1.post2


In [191]:
epochs = 1
bs = 8
n_work = 1
learn_rate = 1e-3

tfm = tv.transforms.Compose([pil_tform, tv.transforms.ToTensor()])
ds = DogsDataset(DATA_PATH, transform=tfm)
train_size = int(np.floor(len(ds) * 0.8))
test_size = len(ds) - train_size
train_dataset, test_dataset = torch.utils.data.dataset.random_split(ds, (train_size, test_size))
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=n_work)
validationloader = torch.utils.data.DataLoader(test_dataset, batch_size=bs, shuffle=True, num_workers=n_work)

model = SimpleCNN()
fit(model, trainloader, validationloader, learn_rate, epochs)

#TODO: 
# - update to pytorch 4 + remove explicit Variable calls
# - breed is still a string
# - train on CUDA






  0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A

Epoch: 1 



Process Process-12:
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in _worker_loop
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 55, in <listcomp>
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataset.py", line 99, in __getitem__
    self.dataset = dataset
  File "<ipython-input-172-97a7e7bcebac>", line 17, in __getitem__
    return self.transform(img), breed
  File "/home/ubuntu/src/anaconda3/envs/fastai/lib/python3.6/site-packages/torchvision-0.1.9-py3.6.egg/torchvision/transforms.py", line 34,

RuntimeError: DataLoader worker (pid 13864) exited unexpectedly with exit code 1.