In [141]:
import torch
import itertools
import bcolz
import torchvision as tv
import pandas as pd
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
from torch.utils.data import dataset, DataLoader, Dataset
from PIL import Image
from functools import partial
from ipywidgets import interact, interactive
from tqdm import tqdm_notebook as tqdm
from tqdm import tnrange
from resnext_101_64x4d import resnext_101_64x4d

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Data

In [2]:
RAW_DATA_DIR = '/home/ubuntu/data/dogbreed/'
pd.read_csv(f'{RAW_DATA_DIR}/labels.csv').head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [4]:
class DogsDataset(Dataset):
    
    def __init__(self, labels_df, data_path, transform=None):
        super().__init__()
        self.labels_df = labels_df
        self.data_path = data_path
        self.transform = transform
        
    def __len__(self):
        return self.labels_df.shape[0]
    
    def __getitem__(self, idx):
        row = self.labels_df.iloc[idx]
        id, breed = row['id'], row['breed']
        img = Image.open(f'{self.data_path}/{id}.jpg')
        if self.transform:
            img = self.transform(img)
        return img, breed

In [5]:
# sanity check
next(iter(DogsDataset(pd.read_csv(f'{RAW_DATA_DIR}/labels.csv'), f'{RAW_DATA_DIR}/train')))

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x375 at 0x7FF3E8D7B2E8>,
 'boston_bull')

In [5]:
def gen_split_indexes(size, train_size=0.8):
    idxs = range(0, size)
    perm = np.random.permutation(idxs)
    split_idx = int(0.8 * len(perm))
    return perm[:split_idx], perm[split_idx:]

In [6]:
def resize_image(target_size, image_path):
    image = Image.open(image_path)
    w, h = image.size
    target_ratio = target_size / min(w, h)
    new_w = np.floor(max(w * target_ratio, target_size))
    new_h = np.floor(max(h * target_ratio, target_size))
    return image.resize((int(new_w), int(new_h)))

def transform_files(file_names, source_dir, dest_dir, tform):
    for name in file_names:
        file = tform(f'{source_dir}/{name}')
        file.save(f'{dest_dir}/{name}')

In [7]:
labels_df = pd.read_csv(f'{RAW_DATA_DIR}/labels.csv')
breed_lookup = {breed : idx for idx, breed in enumerate(sorted(labels_df['breed'].unique()))}
labels_df['breed'] = labels_df['breed'].map(breed_lookup)
train_idxs, val_idxs = gen_split_indexes(labels_df.shape[0])

In [8]:
# sanity check
len(train_idxs) + len(val_idxs) == labels_df.shape[0]

True

# Util

In [10]:
def num_features(m):
    c= m if isinstance(m, (list, tuple)) else list(m.children())
    if len(c)==0: return None
    for l in reversed(c):
        if hasattr(l, 'num_features'): return l.num_features
        res = num_features(l)
        if res is not None: return res

In [11]:
class AdaptiveConcatPool2d(torch.nn.Module):
    
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = torch.nn.AdaptiveAvgPool2d(sz)
        self.mp = torch.nn.AdaptiveMaxPool2d(sz)
        
    def forward(self, x):
        return torch.cat([self.mp(x), self.ap(x)], 1)

class Lambda(torch.nn.Module):
    
    def __init__(self, f):
        super().__init__(); self.f=f
        
    def forward(self, x):
        return self.f(x)

class Flatten(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self, x):
        return x.view(x.size(0), -1)


In [12]:
def cond_init(m, init_fn):
    if not isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d)):
        if hasattr(m, 'weight'): init_fn(m.weight)
        if hasattr(m, 'bias') and hasattr(m.bias, 'data'): m.bias.data.fill_(0.)

In [52]:
class ArrayDataset(Dataset):
    
    def __init__(self, x, y):
        super().__init__()
        self.x = x
        self.y = y
        
    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

# Model 1

In [None]:
def train_epoch(trainloader, model, criterion, optimizer):
    ''' run a single epoch of training'''
    for idx, (input, target) in enumerate(tqdm(trainloader)):
        output = model(input.to(device))
        loss = criterion(output, target.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def evaluate(validationloader, model, criterion):
    '''evaluate the model'''
    model.eval()
    loss_cum = loss_n = correct_cum = correct_n = 0
    for idx, (input, target) in enumerate(tqdm(validationloader)):
        output = model(input.to(device))
        _, preds = output.max(1)
        correct_cum += (preds == target.to(device)).double().sum()
        correct_n += len(target)
        loss_cum += criterion(output, target.to(device)).data
        loss_n +=1
    return (loss_cum / loss_n).item(), (correct_cum / correct_n).item()
        
def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in range(1, epochs + 1):
        print(f'EPOCH: {epoch}')
        train_epoch(trainloader, model, criterion, optimizer)
        val_loss, val_acc = evaluate(validationloader, model, criterion)
        train_loss, train_acc = evaluate(trainloader, model, criterion)
        print(f'val_loss {val_loss}')
        print(f'trn_loss {train_loss}')
        print(f'val_acc {val_acc}')
        print(f'trn_acc {train_acc}')

In [42]:
class SimpleCNN(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.cv_layers = torch.nn.Sequential(torch.nn.Conv2d(3, 8, kernel_size=7, stride=1, padding=3),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2),
                                             torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
                                             torch.nn.ReLU(),
                                             torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc_layers = torch.nn.Sequential(torch.nn.Linear(16 * 56 * 56, 1028),
                                             torch.nn.ReLU(),
                                             torch.nn.Linear(1028, 120),
                                             torch.nn.ReLU())
    def forward(self, x):
        x = self.cv_layers(x)
        x = x.view(x.size(0), -1)
        return self.fc_layers(x)

In [45]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'

epochs = 5
bs = 8
n_work = 1
learn_rate = 1e-3

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
tfm = tv.transforms.Compose([pil_tform, tv.transforms.ToTensor()])
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

model = SimpleCNN().to(device)
fit(model, trainloader, validationloader, learn_rate, epochs)

EPOCH: {epoch}


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.011735941320293397
train accuracy 0.01491989727283845
EPOCH: {epoch}


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.020537897310513444
train accuracy 0.03277485630426807
EPOCH: {epoch}


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.02689486552567237
train accuracy 0.04842851901675431
EPOCH: {epoch}


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.035207823960880194
train accuracy 0.05552158493334964
EPOCH: {epoch}


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.04156479217603912
train accuracy 0.08560596795890914


# Model 2

In [None]:
def train_epoch(trainloader, model, criterion, optimizer):
    ''' run a single epoch of training'''
    for idx, (input, target) in enumerate(tqdm(trainloader)):
        output = model(input.to(device))
        loss = criterion(output, target.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def evaluate(validationloader, model, criterion):
    '''evaluate the model'''
    model.eval()
    loss_cum = loss_n = correct_cum = correct_n = 0
    for idx, (input, target) in enumerate(tqdm(validationloader)):
        output = model(input.to(device))
        _, preds = output.max(1)
        correct_cum += (preds == target.to(device)).double().sum()
        correct_n += len(target)
        loss_cum += criterion(output, target.to(device)).data
        loss_n +=1
    return (loss_cum / loss_n).item(), (correct_cum / correct_n).item()
        
def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in range(1, epochs + 1):
        print(f'EPOCH: {epoch}')
        train_epoch(trainloader, model, criterion, optimizer)
        val_loss, val_acc = evaluate(validationloader, model, criterion)
        train_loss, train_acc = evaluate(trainloader, model, criterion)
        print(f'val_loss {val_loss}')
        print(f'trn_loss {train_loss}')
        print(f'val_acc {val_acc}')
        print(f'trn_acc {train_acc}')

In [52]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
pil_tform = tv.transforms.CenterCrop(224)

epochs = 5
bs = 8
n_work = 1
learn_rate = 1e-3

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
tfm = tv.transforms.Compose([pil_tform, tv.transforms.ToTensor()])
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

model = tv.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

num_fx = model.fc.in_features
model.fc = torch.nn.Linear(num_fx, 120)
fit(model.to(device), trainloader, validationloader, learn_rate, epochs)

EPOCH: 1


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.5075794621026894
train accuracy 0.5554604378133791
EPOCH: 2


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6102689486552567
train accuracy 0.701968937263055
EPOCH: 3


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6650366748166259
train accuracy 0.7715543597896539
EPOCH: 4


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6767726161369193
train accuracy 0.8106885165708695
EPOCH: 5


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.671882640586797
train accuracy 0.838693897517427


In [51]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
pil_tform = tv.transforms.CenterCrop(224)

epochs = 5
bs = 8
n_work = 1
learn_rate = 1e-2

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
tfm = tv.transforms.Compose([pil_tform, tv.transforms.ToTensor()])
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

model = tv.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

num_fx = model.fc.in_features
model.fc = torch.nn.Linear(num_fx, 120)
fit(model.to(device), trainloader, validationloader, learn_rate, epochs)

EPOCH: 1


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.5349633251833741
train accuracy 0.6114711997064939
EPOCH: 2


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6009779951100244
train accuracy 0.7365782071664425
EPOCH: 3


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.5731051344743275
train accuracy 0.7541885777179895
EPOCH: 4


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.5716381418092908
train accuracy 0.7880640821817293
EPOCH: 5


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6078239608801955
train accuracy 0.8728139904610493


In [50]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
tfm = tv.transforms.Compose([
    tv.transforms.RandomRotation(10),
    tv.transforms.ColorJitter(0.05,0.05,0.05,0.05),
    tv.transforms.RandomHorizontalFlip(0.5),
    tv.transforms.CenterCrop(224),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

epochs = 5
bs = 8
n_work = 1
learn_rate = 1e-3

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

model = tv.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

num_fx = model.fc.in_features
model.fc = torch.nn.Linear(num_fx, 120)
fit(model.to(device), trainloader, validationloader, learn_rate, epochs)

EPOCH: 1


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.45232273838630804
train accuracy 0.5109453344747462
EPOCH: 2


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6322738386308068
train accuracy 0.6855815091109209
EPOCH: 3


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6342298288508557
train accuracy 0.7250825486119604
EPOCH: 4


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6645476772616137
train accuracy 0.7574905221964046
EPOCH: 5


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

validation accuracy 0.6640586797066014
train accuracy 0.7726550079491257


In [129]:
model = resnext_101_64x4d()
model.load_state_dict(torch.load('/home/ubuntu/data/weights/resnext_101_64x4d.pth'))

front_layers = list(model.children())[:8]
nf = num_features(front_layers) * 2
front_layers += [AdaptiveConcatPool2d(), Flatten()]
back_layers = [torch.nn.BatchNorm1d(num_features=nf),
               torch.nn.Dropout(p=0.25),
               torch.nn.Linear(in_features=nf, out_features=512),
               torch.nn.ReLU(),
               
               torch.nn.BatchNorm1d(num_features=512),
               torch.nn.Dropout(p=0.25),
               torch.nn.Linear(in_features=512, out_features=120),
               torch.nn.LogSoftmax()]

front_model = torch.nn.Sequential(*front_layers)
back_model = torch.nn.Sequential(*back_layers)

for param in front_model.parameters():
    param.requires_grad = False
    
model = torch.nn.Sequential(front_model, back_model)

In [125]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
tfm = tv.transforms.Compose([
    tv.transforms.RandomRotation(10),
    tv.transforms.ColorJitter(0.05,0.05,0.05,0.05),
    tv.transforms.RandomHorizontalFlip(0.5),
    tv.transforms.CenterCrop(224),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

epochs = 5
bs = 32
n_work = 1
learn_rate = 1e-3

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

fit(model.to(device), trainloader, validationloader, learn_rate, epochs)

EPOCH: 1


HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

validation accuracy 0.8166259168704155
train accuracy 0.856304268068974
EPOCH: 2


HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

validation accuracy 0.8308068459657701
train accuracy 0.876360523419347
EPOCH: 3


HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

validation accuracy 0.8381418092909535
train accuracy 0.8878561819738291
EPOCH: 4


HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

validation accuracy 0.8405867970660146
train accuracy 0.8991072520484286
EPOCH: 5


HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=256), HTML(value='')))

validation accuracy 0.8376528117359412
train accuracy 0.9059557294851412


# Model 3

In [None]:
def train_epoch(trainloader, model, criterion, optimizer):
    ''' run a single epoch of training'''
    for idx, (input, target) in enumerate(tqdm(trainloader)):
        output = model(input.to(device))
        loss = criterion(output, target.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def evaluate(validationloader, model, criterion):
    '''evaluate the model'''
    model.eval()
    loss_cum = loss_n = correct_cum = correct_n = 0
    for idx, (input, target) in enumerate(tqdm(validationloader)):
        output = model(input.to(device))
        _, preds = output.max(1)
        correct_cum += (preds == target.to(device)).double().sum()
        correct_n += len(target)
        loss_cum += criterion(output, target.to(device)).data
        loss_n +=1
    return (loss_cum / loss_n).item(), (correct_cum / correct_n).item()
        
def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in range(1, epochs + 1):
        print(f'EPOCH: {epoch}')
        train_epoch(trainloader, model, criterion, optimizer)
        val_loss, val_acc = evaluate(validationloader, model, criterion)
        train_loss, train_acc = evaluate(trainloader, model, criterion)
        print(f'val_loss {val_loss}')
        print(f'trn_loss {train_loss}')
        print(f'val_acc {val_acc}')
        print(f'trn_acc {train_acc}')

In [83]:
model = resnext_101_64x4d()
model.load_state_dict(torch.load('/home/ubuntu/data/weights/resnext_101_64x4d.pth'))

front_layers = list(model.children())[:8]
nf = num_features(front_layers) * 2
front_layers += [AdaptiveConcatPool2d(), Flatten()]
back_layers = [torch.nn.BatchNorm1d(num_features=nf),
               torch.nn.Dropout(p=0.5),
               torch.nn.Linear(in_features=nf, out_features=512),
               torch.nn.ReLU(),
               
               torch.nn.BatchNorm1d(num_features=512),
               torch.nn.Dropout(p=0.5),
               torch.nn.Linear(in_features=512, out_features=120),
               torch.nn.LogSoftmax()]

front_model = torch.nn.Sequential(*front_layers)
for param in front_model.parameters():
    param.requires_grad = False
    
back_model = torch.nn.Sequential(*back_layers)
back_model.apply(lambda submodule: cond_init(submodule, torch.nn.init.kaiming_normal_))

    
model = torch.nn.Sequential(front_model, back_model)

In [40]:
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
tfm = tv.transforms.Compose([
    tv.transforms.RandomRotation(10),
    tv.transforms.ColorJitter(0.05,0.05,0.05,0.05),
    tv.transforms.RandomHorizontalFlip(0.5),
    tv.transforms.CenterCrop(224),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

epochs = 2
bs = 256 #512 -> CUDA OUT OF MEMORY, JH -> 54
n_work = 4
learn_rate = 1e-2

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)

trainloader = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, shuffle=True, num_workers=n_work, pin_memory=True)

fit(model.to(device), trainloader, validationloader, learn_rate, epochs)

#58, 75

EPOCH: 1


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

val_loss 0.8843400478363037
trn_loss 0.7078558802604675
val_acc 0.7985330073349632
trn_acc 0.8517793811911459
EPOCH: 2


HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

HBox(children=(IntProgress(value=0, max=32), HTML(value='')))

val_loss 0.7485789060592651
trn_loss 0.5386099219322205
val_acc 0.8014669926650366
trn_acc 0.8761159349394644


# Model 4

In [202]:
def train_epoch(trainloader, model, criterion, optimizer):
    ''' run a single epoch of training'''
    t = tqdm(iter(trainloader), leave=False, total=len(trainloader), miniters=0, desc='training')
    for input, target in t:
        output = model(input.to(device))
        loss = criterion(output, target.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def evaluate(validationloader, model, criterion):
    '''evaluate the model'''
    model.eval()
    loss_cum = loss_n = correct_cum = correct_n = 0
    t = tqdm(iter(validationloader), leave=False, total=len(validationloader), miniters=0, desc='validation')
    for input, target in t:
        output = model(input.to(device))
        _, preds = output.max(1)
        correct_cum += (preds == target.to(device)).double().sum()
        correct_n += len(target)
        loss_cum += criterion(output, target.to(device)).data
        loss_n +=1
    return (loss_cum / loss_n).item(), (correct_cum / correct_n).item()
        
def fit(model, trainloader, validationloader, learn_rate, epochs):
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), learn_rate, momentum=0.9, weight_decay=1e-4)
    for epoch in tqdm(range(1, epochs + 1), desc='epoch'):
        train_epoch(trainloader, model, criterion, optimizer)
        val_loss, val_acc = evaluate(validationloader, model, criterion)
        trn_loss, trn_acc = evaluate(trainloader, model, criterion)
        # print message
        headings = ['epoch', 'trn_loss', 'val_loss', 'trn_acc', 'val_acc']
        stats = [epoch] + [np.round(x, 6) for x in [trn_loss, val_loss, trn_acc, val_acc]]
        layout = '{!s:10}' * len(headings)
        if epoch == 1:
            print(layout.format(*headings))
        print(layout.format(*stats))

In [211]:
model = resnext_101_64x4d()
model.load_state_dict(torch.load('/home/ubuntu/data/weights/resnext_101_64x4d.pth'))

front_layers = list(model.children())[:8]
nf = num_features(front_layers) * 2
front_layers += [AdaptiveConcatPool2d(), Flatten()]
back_layers = [torch.nn.BatchNorm1d(num_features=nf),
               torch.nn.Dropout(p=0.5),
               torch.nn.Linear(in_features=nf, out_features=512),
               torch.nn.ReLU(),
               
               torch.nn.BatchNorm1d(num_features=512),
               torch.nn.Dropout(p=0.5),
               torch.nn.Linear(in_features=512, out_features=120),
               torch.nn.LogSoftmax()]

front_model = torch.nn.Sequential(*front_layers)
for param in front_model.parameters():
    param.requires_grad = False
    
back_model = torch.nn.Sequential(*back_layers)
_ = back_model.apply(lambda submodule: cond_init(submodule, torch.nn.init.kaiming_normal_))


In [212]:
# Precompute activations
DATA_PATH = '/home/ubuntu/data/dogbreed/tmp/funk/340'
tfm = tv.transforms.Compose([
    tv.transforms.CenterCrop(224),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

bs = 128 #512 -> CUDA OUT OF MEMORY, JH -> 54
n_work = 4

train_labels_df = labels_df.iloc[train_idxs].reset_index(drop=True)
val_labels_df = labels_df.iloc[val_idxs].reset_index(drop=True)
train_ds = DogsDataset(train_labels_df, DATA_PATH, transform=tfm)
val_ds = DogsDataset(val_labels_df, DATA_PATH, transform=tfm)
trainloader = DataLoader(train_ds, batch_size=bs, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, num_workers=n_work, pin_memory=True)


model = front_model.to(device)

path = '/home/ubuntu/data/dogbreed/tmp/funk/precompute'
#bc = bcolz.carray(np.zeros((0, 4096), dtype=np.float32), rootdir=path, mode='w', chunklen=1)
#for idx, (input, target) in enumerate(tqdm(trainloader)):
#    output = model(input.to(device))
#    bc.append(output)
#    bc.flush()
    
path = '/home/ubuntu/data/dogbreed/tmp/funk/precompute_val'
#bc = bcolz.carray(np.zeros((0, 4096), dtype=np.float32), rootdir=path, mode='w', chunklen=1)
#for idx, (input, target) in enumerate(tqdm(validationloader)):
#    output = model(input.to(device))
#    bc.append(output)
#    bc.flush()

In [215]:
# CAREFUL: this is order dependent - depends on the train indexes used when the
# precompute file was written.

trn = bcolz.open('/home/ubuntu/data/dogbreed/tmp/funk/precompute')
val = bcolz.open('/home/ubuntu/data/dogbreed/tmp/funk/precompute_val')

trn_ds = ArrayDataset(trn, train_labels_df['breed'].values)
val_ds = ArrayDataset(val, val_labels_df['breed'].values)

trainloader = DataLoader(trn_ds, batch_size=bs, num_workers=n_work, pin_memory=True)
validationloader = DataLoader(val_ds, batch_size=bs, num_workers=n_work, pin_memory=True)

epochs = 5
learn_rate = 1e-2

fit(back_model.to(device), trainloader, validationloader, learn_rate, epochs)

HBox(children=(IntProgress(value=0, description='epoch', max=5, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='training', max=64, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='validation', max=16, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='validation', max=64, style=ProgressStyle(description_width='i…

epoch     trn_loss  val_loss  trn_acc   val_acc   
1         0.064953  0.508096  0.997554  0.845966  


HBox(children=(IntProgress(value=0, description='training', max=64, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='validation', max=16, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='validation', max=64, style=ProgressStyle(description_width='i…

2         0.056065  0.507651  0.99841   0.845477  


HBox(children=(IntProgress(value=0, description='training', max=64, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='validation', max=16, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='validation', max=64, style=ProgressStyle(description_width='i…

3         0.049051  0.507737  0.998899  0.847433  


HBox(children=(IntProgress(value=0, description='training', max=64, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='validation', max=16, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='validation', max=64, style=ProgressStyle(description_width='i…

4         0.043412  0.508225  0.999389  0.847433  


HBox(children=(IntProgress(value=0, description='training', max=64, style=ProgressStyle(description_width='ini…

HBox(children=(IntProgress(value=0, description='validation', max=16, style=ProgressStyle(description_width='i…

HBox(children=(IntProgress(value=0, description='validation', max=64, style=ProgressStyle(description_width='i…

5         0.038824  0.508888  0.999511  0.848411  


In [None]:
# HIT LIST
# (1) Loss - is yours correct? esp. traning losses - check the OS imagenet train loop.
# (2) Match fast.ai default performance
# (3) Implement train loop bells and whistles
# (4) Composable training loop  (boot task interface?)