In [140]:
import torch
import itertools
import torchvision as tv
import pandas as pd
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
from torch.utils.data import dataset, DataLoader, Dataset
from PIL import Image
from functools import partial
from ipywidgets import interact, interactive
from tqdm import tqdm_notebook as tqdm

In [165]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Data

In [12]:
DATA_PATH = '/home/ubuntu/data/dogbreed'
pd.read_csv(f'{DATA_PATH}/labels.csv').head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [23]:
class DogsDataset(Dataset):
    
    def __init__(self, labels_df, data_path, transform=None):
        super().__init__()
        self.labels_df = labels_df
        self.data_path = data_path
        self.transform = transform
        
    def __len__(self):
        return self.labels_df.shape[0]
    
    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]
        id, breed = row['id'], row['breed']
        img = Image.open(f'{self.data_path}/train/{id}.jpg')
        if self.transform:
            img = self.transform(img)
        return img, breed

In [29]:
# sanity check
next(iter(DogsDataset(pd.read_csv(f'{DATA_PATH}/labels.csv'), DATA_PATH)))

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375 at 0x7FEB82D1C470>,
 'boston_bull')

In [27]:
def gen_split_indexes(train_size=0.8):
    idxs = range(0, labels.shape[0])
    perm = np.random.permutation(idxs)
    split_idx = int(0.8 * len(perm))
    return perm[:split_idx], perm[split_idx:]

In [28]:
def resize_image(target_size, image_path):
    image = Image.open(image_path)
    w, h = image.size
    target_ratio = target_size / min(w, h)
    new_w = np.floor(max(w * target_ratio, target_size))
    new_h = np.floor(max(h * target_ratio, target_size))
    return image.resize((int(new_w), int(new_h)))

def transform_files(file_names, source_dir, dest_dir, tform):
    for name in file_names:
        file = tform(f'{source_dir}/{name}')
        file.save(f'{dest_dir}/{name}')

In [149]:
labels_df = pd.read_csv(f'{DATA_PATH}/labels.csv')
breed_lookup = {breed : idx for idx, breed in enumerate(sorted(labels_df['breed'].unique()))}
labels_df['breed'] = labels_df['breed'].map(breed_lookup)
train_idxs, val_idxs = gen_split_indexes(labels_df.shape[0])

In [42]:
# sanity check
len(train_idxs) + len(val_idxs) == labels_df.shape[0]

True

# Model 1

In [38]:
# Preprocessing:
# - (1) Rescale images to have shortest side of 340px.
# - (2) Take center 224 x 224 crop.

In [62]:
# (1) Rescale images to have shortest side of 340px
source_dir = '/home/ubuntu/data/dogbreed/train'
dest_dir = '/home/ubuntu/data/dogbreed/tmp/funk/340'
img_names = labels['id'].map(lambda id: id + '.jpg')
transform_files(img_names, source_dir, dest_dir, partial(resize_image, 224))

In [46]:
# (2) Take random 224 x 224 crop
pil_tform = tv.transforms.CenterCrop(224)

In [69]:
# sanity_check
def show_input_image(idx):
    img = pil_tform(Image.open(f'{dest_dir}/{img_names.iloc[idx]}'))
    plt.imshow(img)
    
interact(show_input_image, idx=widgets.IntSlider(min=0, max=img_names.shape[0] -1 , value = 0))

# TODO:
# - scaling, reflections..etc

interactive(children=(IntSlider(value=0, description='idx', max=10221), Output()), _dom_classes=('widget-inter…

<function __main__.show_input_image(idx)>

In [92]:
class SimpleCNN(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.cv_layers = torch.nn.Sequential(torch.nn.Conv2d(3, 8, kernel_size=7, stride=1, padding=3),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2),
                                             torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc_layers = torch.nn.Sequential(torch.nn.Linear(16 * 56 * 56, 1028),
                                             torch.nn.ReLU(),
                                             torch.nn.Linear(1028, 120),
                                             torch.nn.ReLU())
    def forward(self, x):
        x = self.cv_layers(x)
        x = x.view(x.size(0), -1)
        return self.fc_layers(x)

In [93]:
# sanity check
model = SimpleCNN()
tensor = (tv.transforms.ToTensor()(pil_tform(Image.open(f'{dest_dir}/{img_names.iloc[0]}')))).unsqueeze(0)
model(tensor).shape

torch.Size([1, 120])

In [248]:
def train_epoch(trainloader, model, criterion, optimizer, print_freq=100):
    ''' run a single epoch of training'''
    model.train()
    for idx, (input, target) in enumerate(tqdm(trainloader)):
        output = model(input.to(device))
        loss = criterion(output, target.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def accuracy(output, target):
    _, preds = output.max(1)
    return (preds == target).double().mean()

def evaluate(validationloader, model):
    '''evaluate the model'''
    model.eval()
    correct_cum = n = 0
    for idx, (input, target) in enumerate(validationloader):
        output = model(input.to(device))
        _, preds = output.max(1)
        correct_cum += (preds == target.to(device)).double().sum()
        n += len(target)
    return (correct_cum / n).item()
        
def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in range(1, epochs + 1):
        train_epoch(trainloader, model, criterion, optimizer)
        val_acc = evaluate(validationloader, model)
        train_acc = evaluate(trainloader, model)
        print(f' train accuracy: {train_acc} \n validation accuracy: {val_acc}')

In [249]:
evaluate(validationloader, model)

0.03129584352078239

In [251]:
epochs = 5
bs = 8
n_work = 1
learn_rate = 1e-3

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
tfm = tv.transforms.Compose([pil_tform, tv.transforms.ToTensor()])
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

model = SimpleCNN().to(device)
fit(model, trainloader, validationloader, learn_rate, epochs)

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

 train accuracy: 0.011617952794423384 
 validation accuracy: 0.007823960880195598


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

 train accuracy: 0.015531368472544944 
 validation accuracy: 0.011246943765281172


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

 train accuracy: 0.0298397945456769 
 validation accuracy: 0.019559902200488997


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

 train accuracy: 0.038889568301333007 
 validation accuracy: 0.027383863080684592


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

 train accuracy: 0.058212058212058215 
 validation accuracy: 0.035207823960880194


# Model 2

In [None]:
# ------
# POSSIBILITES
# - transfer learning tutorial, fast.ai lesson + implement 
# - JH's training tricks
# - LR finder