In [1]:
import os
import csv

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import pandas as pd
from skimage import io, transform
import numpy as np

from torchvision import datasets, models, transforms, utils

from data_utils import DogBreeds
from model_helpers import train_model, test_model, get_model_predictions
from PIL import Image, ImageFilter

In [2]:
BASE_PATH = os.getcwd()
use_gpu = torch.cuda.is_available()

In [3]:
def rotate_image(image):
    if np.random.uniform() < 0.5:
        return image.rotate(np.random.uniform(-15, 15))
    return image

def blur_image(image):
    if np.random.uniform() < 0.5:
        return image.filter(ImageFilter.GaussianBlur(radius=2))
    return image

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.Lambda(lambda x: rotate_image(x)),
        transforms.Lambda(lambda x: blur_image(x)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [5]:
dogs_train = DogBreeds('labels.csv', BASE_PATH, transform=data_transforms['train'], data_split='train')
dogs_valid = DogBreeds('labels.csv', BASE_PATH, transform=data_transforms['val'], data_split='valid')
dogs_test = DogBreeds('labels.csv', BASE_PATH, transform=data_transforms['val'], data_split='test')

In [6]:
train_dataloader = DataLoader(dogs_train, batch_size=16,shuffle=True, num_workers=4)
valid_dataloader = DataLoader(dogs_valid, batch_size=16,shuffle=False, num_workers=4)
test_dataloader = DataLoader(dogs_test, batch_size=16,shuffle=False, num_workers=4)

datasets = {
    'train': train_dataloader,
    'valid': valid_dataloader,
    'test': test_dataloader
}

dataset_sizes = {
    'train': len(dogs_train),
    'valid': len(dogs_valid),
    'test': len(dogs_test)
}

In [7]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 120)

if use_gpu:
    model_ft = model_ft.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [8]:
MODEL_SAVE_PATH = os.path.join(os.getcwd(), 'finetuned_resnet18')
if os.path.exists(MODEL_SAVE_PATH):
    print("Loading fine tuned model...")
    model_ft = torch.load(MODEL_SAVE_PATH)
    print("loaded")
else:
    print("Training model")
    model_ft = train_model(model_ft, criterion, optimizer_ft, 
                           exp_lr_scheduler, datasets, dataset_sizes,
                           use_gpu, num_epochs=25)
    torch.save(model_ft, MODEL_SAVE_PATH)

Loading fine tuned model...
loaded


In [9]:
ft_resnet18 = torch.load(MODEL_SAVE_PATH)
test_model(ft_resnet18, criterion, optimizer_ft, 
           exp_lr_scheduler, datasets, dataset_sizes,
           use_gpu, num_epochs=1)

test Loss: 0.0421 Acc: 0.8113


In [10]:
SUBMISSION_FILES = os.path.join(os.getcwd(), 'test')
SUBMISSION_IMAGE_LABELS = os.path.join(os.getcwd(), 'submission_images.csv')
SUBMISSION_CSV_PATH = os.path.join(BASE_PATH, 'submission.csv')
list_submission_image_names = [x[:-4] for x in os.listdir(SUBMISSION_FILES)] # remove .jpg

In [11]:
submission_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


submission_data = DogBreeds('submission_images.csv', BASE_PATH, transform=submission_transform, data_split='submission')
submission_dataloader = DataLoader(submission_data, batch_size=1,shuffle=False, num_workers=4)

In [12]:
get_model_predictions(ft_resnet18, optimizer_ft, 
                      dogs_train.unique_labels, SUBMISSION_CSV_PATH, 
                      submission_dataloader, list_submission_image_names,
                      use_gpu, num_epochs=1)