In [None]:
import os

import numpy as np
import pandas as pd
import tensorflow as tf

import torch
import torchvision
from torchvision import transforms, models
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sklearn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# Define constants
N_CLASS = 555
BATCH_SIZE = 128
IM_SIZE = 256
IM_PADDING = 16
VAL_SPLIT = 0.1

## Clean and format data

In [None]:
def get_bird128_data():
    transform_train = transforms.Compose([
        transforms.Resize(IM_SIZE),
        transforms.RandomCrop(IM_SIZE, padding=IM_PADDING, padding_mode='edge'), 
        transforms.RandomHorizontalFlip(),    # Flip 50% of images along y-axis
        transforms.ToTensor(),
        transforms.Normalize(0, 1)
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(0, 1)
    ])
    
    trainset = torchvision.datasets.ImageFolder(root='../input/birds-22wi/birds/train/', transform=transform_train)
    testset = torchvision.datasets.ImageFolder(root='../input/birds-22wi/birds/test/', transform=transform_test)
    
    n = len(trainset)
    indices = list(range(n))
    np.random.shuffle(indices)
    split_idx = int(np.floor(VAL_SPLIT * n))
    
    train_indices = indices[split_idx:]
    val_indices = indices[:split_idx]
    
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_indices)
    
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=2)
    valloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, sampler=val_sampler, num_workers=2)
    testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    
    return {'train': trainloader, 'val': valloader, 'test': testloader}

data = get_bird128_data()

In [None]:
print(iter(data['train']).next())

In [None]:
test_batch = next(iter(data['val']))
print(test_batch)

In [None]:
dataiter = iter(data['train'])
images, labels = dataiter.next()
images = images[:8]
print(images.size())

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print("Labels:" + ' '.join('%9s' % labels[j] for j in range(8)))

flat = torch.flatten(images, 1)
print(images.size())
print(flat.size())

## Model setup

In [None]:
def train(net, dataloader, epochs=1, start_epoch=0, lr=0.01, momentum=0.9, decay=0.0005, 
          verbose=1, print_every=10, state=None, schedule={}, checkpoint_path=None):
    net.to(device)
    net.train()
    losses = []
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

    # Load previous training state
    if state:
        net.load_state_dict(state['net'])
        optimizer.load_state_dict(state['optimizer'])
        start_epoch = state['epoch']
        losses = state['losses']

  # Fast forward lr schedule through already trained epochs
    for epoch in range(start_epoch):
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

    for epoch in range(start_epoch, epochs):
        sum_loss = 0.0

        # Update learning rate when scheduled
        if epoch in schedule:
            print ("Learning rate: %f"% schedule[epoch])
            for g in optimizer.param_groups:
                g['lr'] = schedule[epoch]

        for i, batch in enumerate(dataloader, 0):
            inputs, labels = batch[0].to(device), batch[1].to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()  # autograd magic, computes all the partial derivatives
            optimizer.step() # takes a step in gradient direction

            losses.append(loss.item())
            sum_loss += loss.item()

            if i % print_every == print_every-1:    # print every 10 mini-batches
                if verbose:
                    print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / print_every))
                sum_loss = 0.0
        if checkpoint_path:
            state = {'epoch': epoch+1, 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'losses': losses}
            torch.save(state, checkpoint_path + 'checkpoint-%d.pkl'%(epoch+1))
    return losses

def accuracy(net, dataloader):
    net.to(device)
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            images, labels = batch[0].to(device), batch[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct/total

def smooth(x, size):
    return np.convolve(x, np.ones(size)/size, mode='valid')

In [None]:
resnet50 = models.resnet50(pretrained=True)
resnext50 = models.resnext50_32x4d(pretrained=True)
wideresnet50 = models.wide_resnet50_2(pretrained=True)

model_dict = {'resnet': resnet50, 'resnext': resnext50, 'wideresnet': wideresnet50}

# Don't update weights other than last layer
for model_name in model_dict:
    model = model_dict[model_name]
    for param in model.parameters():
        param.requires_grad = False
    
# Replace fully connected layer
    fc_in = model.fc.in_features
    model.fc = nn.Linear(fc_in, N_CLASS)

## Model training

In [None]:
if (not os.path.isdir('./checkpoints/')):
    os.makedirs('./checkpoints/')
checkpoints = './checkpoints/'

In [None]:
model_losses = {}
for model_name in model_dict:
    model_loss = train(model_dict[model_name], data['train'], epochs=10, schedule = {0:.01, 5:.001}, checkpoint_path=checkpoints + model_name)
    model_losses[model_name] = model_loss

In [None]:
plt.plot(smooth(model_losses['resnet'],50))

In [None]:
plt.plot(smooth(model_losses['resnext'],50))

In [None]:
plt.plot(smooth(model_losses['wideresnet'],50))

In [None]:
print("Validation accuracy, resnet: %f" % accuracy(model_dict['resnet'], data['val']))
print("Validation accuracy, resnext: %f" % accuracy(model_dict['resnext'], data['val']))
print("Validation accuracy, wideresnet: %f" % accuracy(model_dict['wideresnet'], data['val']))

## Format for submission

In [None]:
def getPredFile(model):
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(0, 1)
    ])
    testset = torchvision.datasets.ImageFolder(root='../input/birds-22wi/birds/test/', transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, num_workers=2)
        
    f = open("submission.csv", "w")
    with torch.no_grad():
        for i, (images, labels) in enumerate(testloader, 0):
            outputs = model(images.to(device))
            _, predicted = torch.max(outputs.data, 1)
            fname, _ = testloader.dataset.samples[i]
            pathname_idx = fname.index('test/0/')
            fname = 'test/' + fname[pathname_idx + 7:]
            f.write("{}, {}\n".format(fname, predicted.item()))
    f.close()

getPredFile(model_dict['resnet'])