# Nature Conservancy Fish Classification With PyTorch

### Imports & Environment

In [1]:
import sys
import os
import os.path
import random
import collections
import shutil
import time
import glob
import csv
import numpy as np
import pandas as pd

import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

from PIL import Image

ROOT_DIR = os.getcwd()
DATA_HOME_DIR = ROOT_DIR + '/data'
%matplotlib inline

### Config & Hyperparameters

In [16]:
# paths
data_path = DATA_HOME_DIR + '/' 
split_train_path = data_path + '/train/'
full_train_path = data_path + '/train_full/'
valid_path = data_path + '/valid/'
test_path = DATA_HOME_DIR + '/test/test_stg1/'
saved_model_path = ROOT_DIR + '/models/'
submission_path = ROOT_DIR + '/submissions/'

# data
batch_size = 16
nb_split_train_samples = 3327
nb_full_train_samples = 3777
nb_valid_samples = 450
nb_test_samples = 1000
classes = ["ALB", "BET", "DOL", "LAG", "NoF", "OTHER", "SHARK", "YFT"]
nb_classes = len(classes)

# model
nb_runs = 1
nb_aug = 10
epochs = 100
lr = 1e-3
clip = 0.01
archs = ["resnet152"]

model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__"))
best_prec1 = 1000000

In [3]:
model_names

['alexnet',
 'resnet',
 'resnet101',
 'resnet152',
 'resnet18',
 'resnet34',
 'resnet50',
 'vgg',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn']

### Helper Functions for Training

In [4]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    end = time.time()
    
    # switch to train mode
    model.train()
    
    for i, (images, target) in enumerate(train_loader):
        
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(async=True) - 1
        
        image_var = torch.autograd.Variable(images)
        label_var = torch.autograd.Variable(target)

        # compute y_pred
        y_pred = model(image_var)
        loss = criterion(y_pred, label_var)

        # measure accuracy and record loss
        prec1, prec1 = accuracy(y_pred.data, target, topk=(1, 1))
        losses.update(loss.data[0], images.size(0))
        acc.update(prec1[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

In [5]:
def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (images, labels) in enumerate(val_loader):
        labels = labels.cuda(async=True)
        image_var = torch.autograd.Variable(images, volatile=True)
        label_var = torch.autograd.Variable(labels, volatile=True)

        # compute y_pred
        y_pred = model(image_var)
        loss = criterion(y_pred, label_var)

        # measure accuracy and record loss
        prec1, temp_var = accuracy(y_pred.data, labels, topk=(1, 1))
        losses.update(loss.data[0], images.size(0))
        acc.update(prec1[0], images.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    print('   * EPOCH {epoch} | Accuracy: {acc.avg:.3f} | Loss: {losses.avg:.3f}'.format(epoch=epoch+1,
                                                                                         acc=acc,
                                                                                         losses=losses))

    return losses.avg

In [17]:
def test(test_loader, model):
    # placeholder arrays for predictions and id column
    preds = np.zeros(shape=(nb_test_samples, nb_classes))
    id_col = []
    
    # switch to evaluate mode
    model.eval()
    
    # average predictions across several different augmentations of the test data
    for aug in range(nb_aug):
        print("   * Predicting on test augmentation {}".format(aug + 1))
        
        # iterate through image data, one file at a time (assuming batch size set to 1)
        for i, (images, filepath) in enumerate(test_loader):
            # grab just the image filename (to match kaggle's submission format)
            filepath = filepath[0].split("/")[-1]

            # generate prediction from image data variable
            image_var = torch.autograd.Variable(images, volatile=True)
            y_pred = model(image_var)
            
            # get the probabilities for each class using softmax activation
            smax = nn.Softmax()
            smax_out = smax(y_pred)[0].data
            
            # convert to numpy array
            smax_out = np.array([prob for prob in smax_out])
            
            # add the scaled class probabilities and the image id to the placeholders
            preds[i] += smax_out
            
            if aug == 0:
                id_col.append(filepath)
       
    # convert averaged prediction array to pandas dataframe so we can easily add id column
    preds /= nb_aug
    pred = pd.DataFrame(preds, columns=[classes])
    pred["image"] = id_col
    
    # filename for our submission file w/ extra info about this test run
    sub_fn = submission_path + '{0}epoch_{1}clip_{2}runs_{3}'.format(epochs, clip, nb_runs, nb_aug)
    
    # add the architecture context to filename
    for arch in archs:
        sub_fn += "_{}".format(arch)
        
    # write predictions to csv
    pred.to_csv(sub_fn + '.csv', index=False)

In [7]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')      

In [8]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [9]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    global lr
    lr = lr * (0.1**(epoch // 30))
    for param_group in optimizer.state_dict()['param_groups']:
        param_group['lr'] = lr


def accuracy(y_pred, y_actual, topk=(1, )):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = y_actual.size(0)

    _, pred = y_pred.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(y_actual.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))

    return res

In [10]:
class TestImageFolder(data.Dataset):
    def __init__(self, root, transform=None):
        images = []
        for filename in sorted(glob.glob(test_path + "*.jpg")):
            images.append('{}'.format(filename))

        self.root = root
        self.imgs = images
        self.transform = transform

    def __getitem__(self, index):
        filename = self.imgs[index]
        img = Image.open(os.path.join(self.root, filename))
        if self.transform is not None:
            img = self.transform(img)
        return img, filename

    def __len__(self):
        return len(self.imgs)

In [11]:
def shear(img):
    width, height = img.size
    m = random.uniform(-0.02, 0.02)
    xshift = abs(m) * width
    new_width = width + int(round(xshift))
    img = img.transform((new_width, height), Image.AFFINE,
                        (1, m, -xshift if m > 0 else 0, 0, 1, 0),
                        Image.BICUBIC)
    return img

### Main Training Loop

In [12]:
def main(mode="train", resume=False):
    
    global best_prec1
    arch = archs[0]
    
    # create model
    print("=> Starting {0} on '{1}' model".format(mode, arch))
    model = models.__dict__[arch](pretrained=True)
    # Don't update non-classifier learned features in the pretrained networks
    for param in model.parameters():
        param.requires_grad = False

    # Replace the last fully-connected layer
    # Parameters of newly constructed modules have requires_grad=True by default

    output_dim = {
        "resnet101": 2048,
        "resnet152": 2048,
    }

    model.fc = nn.Linear(output_dim[arch], nb_classes)  

    if arch.startswith('alexnet') or arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # optionally resume from a checkpoint
    if resume:
        if os.path.isfile(resume):
            print("=> Loading checkpoint '{}'".format(resume))
            checkpoint = torch.load(resume)
            start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> Loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        else:
            print("=> No checkpoint found at '{}'".format(resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = split_train_path
    valdir = valid_path
    testdir = test_path

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    train_loader = data.DataLoader(
        datasets.ImageFolder(traindir,
                             transforms.Compose([
                                 transforms.Scale(400),
                                 transforms.RandomSizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True)

    val_loader = data.DataLoader(
        datasets.ImageFolder(valdir,
                             transforms.Compose([
                                 transforms.Scale(400),
                                 transforms.RandomSizedCrop(224),
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True)

    test_loader = data.DataLoader(
        TestImageFolder(testdir,
                        transforms.Compose([
                            transforms.Scale(400),
                            transforms.RandomSizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize,
                        ])),
        batch_size=1,
        shuffle=False,
        num_workers=1,
        pin_memory=False)


    if mode == "test":
        test(test_loader, model)
        return

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()

    if mode == "validate":
        validate(val_loader, model, criterion, 0)
        return

    optimizer = optim.Adam(model.module.fc.parameters(), lr, weight_decay=1e-4)

    for epoch in range(epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best Accuracy and save checkpoint
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best)

### Run Train

In [13]:
main(mode="train")

=> Starting train on 'resnet152' model
   * EPOCH 1 | Accuracy: 58.667 | Loss: 1.177
   * EPOCH 2 | Accuracy: 59.111 | Loss: 1.109
   * EPOCH 3 | Accuracy: 58.000 | Loss: 1.053
   * EPOCH 4 | Accuracy: 62.000 | Loss: 0.998
   * EPOCH 5 | Accuracy: 63.333 | Loss: 1.035
   * EPOCH 6 | Accuracy: 64.667 | Loss: 1.011
   * EPOCH 7 | Accuracy: 61.778 | Loss: 1.062
   * EPOCH 8 | Accuracy: 66.222 | Loss: 1.028
   * EPOCH 9 | Accuracy: 68.444 | Loss: 0.861
   * EPOCH 10 | Accuracy: 70.667 | Loss: 0.841
   * EPOCH 11 | Accuracy: 59.556 | Loss: 1.095
   * EPOCH 12 | Accuracy: 72.222 | Loss: 0.823
   * EPOCH 13 | Accuracy: 69.333 | Loss: 0.855
   * EPOCH 14 | Accuracy: 70.444 | Loss: 0.905
   * EPOCH 15 | Accuracy: 71.778 | Loss: 0.850
   * EPOCH 16 | Accuracy: 70.222 | Loss: 0.830
   * EPOCH 17 | Accuracy: 68.444 | Loss: 0.851
   * EPOCH 18 | Accuracy: 73.333 | Loss: 0.798
   * EPOCH 19 | Accuracy: 69.111 | Loss: 0.875
   * EPOCH 20 | Accuracy: 68.444 | Loss: 0.849
   * EPOCH 21 | Accuracy: 73.3

In [18]:
main(mode="validate", resume='model_best.pth.tar')

=> Starting validate on 'resnet152' model
=> Loading checkpoint 'model_best.pth.tar'
=> Loaded checkpoint (epoch 100)
   * EPOCH 1 | Accuracy: 81.333 | Loss: 0.647


### Run Test

In [19]:
main(mode="test", resume='model_best.pth.tar')

=> Starting test on 'resnet152' model
=> Loading checkpoint 'model_best.pth.tar'
=> Loaded checkpoint (epoch 100)
   * Predicting on test augmentation 1
   * Predicting on test augmentation 2
   * Predicting on test augmentation 3
   * Predicting on test augmentation 4
   * Predicting on test augmentation 5
   * Predicting on test augmentation 6
   * Predicting on test augmentation 7
   * Predicting on test augmentation 8
   * Predicting on test augmentation 9
   * Predicting on test augmentation 10
