In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import lib.data_loaders as data_loaders
import lib.data_transformers as data_transformers
import lib.datasets as ds
import lib.models_repo as models_repo
import lib.optimizer_repo as optimizer_repo
import lib.scheduler_repo as scheduler_repo
import lib.trainer as trainer
import lib.model_saver as model_saver

In [4]:
from torchvision.datasets.folder import ImageFolder

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
%matplotlib inline

In [None]:
data_path = "/home/as/datasets/kaggle.dog.breed/train"
csv_file = "/home/as/datasets/kaggle.dog.breed/labels.csv"
sample_submission_file = '/home/as/datasets/kaggle.dog.breed/sample_submission.csv'

num_classes = 120
img_size  = 340
batch_size = 128

In [None]:
norm = data_transformers.resnet_normaliser
trans = data_transformers.get_transformer(img_size, norm, False)
trans_aug = data_transformers.get_transformer(img_size, norm, True)
trans_valid = data_transformers.get_test_valid_transformer(img_size, norm)

In [None]:
#np.random.seed(42)
labels = pd.read_csv(csv_file)
mask = np.random.rand(len(labels)) < 0.75
labels_train = labels[mask].values
labels_valid = labels[~mask].values
sub = pd.read_csv(sample_submission_file,)
breeds = list(sub)[1:]

train_images = ds.DatasetFromCSV(labels_train, breeds, data_path, 'jpg', transform=trans)
train_images_aug = ds.DatasetFromCSV(labels_train, breeds, data_path, 'jpg', transform=trans_aug)
valid_images = ds.DatasetFromCSV(labels_valid, breeds, data_path, 'jpg', transform=trans_valid)

In [None]:
len(breeds)

In [None]:
print('Number of train instances', len(train_images))
print('Number of valid instances', len(valid_images))

In [None]:
print('Classes', train_images.classes)

In [None]:
train_loader = data_loaders.get_data_loader(train_images, batch_size)
train_loader_aug = data_loaders.get_data_loader(train_images_aug, batch_size)
valid_loader = data_loaders.get_data_loader(valid_images, batch_size)

In [None]:
if False:
    model = models_repo.resnet_vanilla(num_classes)
    criteria, optimizer = optimizer_repo.sgd(model, 0.01)
    scheduler = scheduler_repo.step_lr(optimizer)

    best_model = trainer.experiment(model, criteria, optimizer, scheduler, train_loader, valid_loader, 10)

In [None]:
if False:
    model = models_repo.resnet_vanilla(num_classes)
    criteria, optimizer = optimizer_repo.sgd(model, 0.01)
    scheduler = scheduler_repo.step_lr(optimizer)

    best_model = trainer.experiment(model, criteria, optimizer, scheduler, train_loader_aug, valid_loader, 10)

In [None]:
model, params_to_optimize = models_repo.resnet_extra_layers(num_classes, top_layers_to_freeze=7, debug=True)

In [None]:
model, params_to_optimize = models_repo.resnet_extra_layers(num_classes, top_layers_to_freeze=7)
criteria, optimizer = optimizer_repo.sgd(model, 0.01, params_to_optimize=params_to_optimize)
scheduler = scheduler_repo.step_lr(optimizer, 20, 0.01)

ret = trainer.experiment(model, criteria, optimizer, scheduler, train_loader_aug, valid_loader, 100)
model, train_loss_trend, val_loss_trend, best_epoch, best_accuracy, best_loss = ret

In [None]:
plt.plot(train_loss_trend)
plt.show()

In [None]:
plt.plot(val_loss_trend)
plt.show()

In [None]:
f = model_saver.save_checkpoint(best_epoch, 'resnet34', best_model, best_accuracy, 
                                best_loss, optimizer, 'dog.breed')

In [None]:
best_model.eval()

In [None]:
from PIL import Image
import torch
import torch.autograd as autograd
import os

In [None]:
def pred(img_path):
    img = Image.open(img_path).convert('RGB')
    img = trans_valid(img)
    img = autograd.Variable(torch.unsqueeze(img, 0).cuda())
    bm = best_model.cuda()
    output = bm(img)
    arr = output.data.cpu().numpy()[0]
    arr = np.exp(arr) / np.sum(np.exp(arr))
    return arr


In [None]:
f = open('/tmp/submission.csv', 'w')


for file in os.listdir('/home/as/datasets/kaggle.dog.breed/test'):
    p = os.path.join('/home/as/datasets/kaggle.dog.breed/test', file)
    arr = pred(p)
    s = ''
    for a in arr:
        s += ',' + str(a)
    f.write(file.replace('.jpg', '') + s + '\n')
f.close()

In [None]:
arr

In [None]:
np.argmax(arr)
np.argmax(output.data.cpu().numpy()[0])
breeds[41]
output.data.cpu().numpy()[0][59]

In [None]:
h = []
w = []

for file in os.listdir('/home/as/datasets/kaggle.dog.breed/train'):
    p = os.path.join('/home/as/datasets/kaggle.dog.breed/train', file)
    img = Image.open(p).convert('RGB')
    h.append(img.size[0])
    w.append(img.size[1])



In [None]:
import numpy as np
print(340 - np.min(h))
print(340 - np.min(w))

In [None]:
import random

In [None]:
random.randint(0, -1)