# Fish classification using ResNet50

## First model

### Setting up

In [1]:
# import default libraries
import glob
import os
import shutil
import time
from IPython.core.debugger import Tracer # call Tracer()() for debugging

In [2]:
# import PyTorch modules
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

In [3]:
# intermediate folder
intermediate_path = os.path.join("..", "intermediate")
# train/val folders
train_path = os.path.join(intermediate_path, "train")
val_path = os.path.join(intermediate_path, "val")
valnum = 500 # number of images for validation

### Process data

In [4]:
# create intermediate folder, copy train data, and split
if not os.path.isdir(train_path):
    shutil.copytree("../data/train/", train_path)
    # remove hidden folders
    for hidden_folder in glob.glob(train_path + "/.*"):
        !rm -r $hidden_folder

if not os.path.isdir(val_path):
    import numpy as np
    np.random.seed(7)
    g = glob.glob(train_path + "/*/*.jpg")
    shuf = np.random.permutation(g)
    for i in range(valnum):
        os.renames(shuf[i], shuf[i].replace("train", "val"))

### Global parameters and utility functions

In [5]:
# global parameters
arch = "resnet50"
num_workers = 4
batch_size = 16 # out of memory for 32 or 64
learning_rate = 1e-4
print_freq = 10
pretrained = True

In [6]:
# utility functions
# TODO: try to use torchnet utilities
# link: https://github.com/pytorch/tnt/
# replace: AverageMeter, accurary
def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"):
    checkpoint_filepath = os.path.join(intermediate_path, filename)
    torch.save(state, checkpoint_filepath)
    if is_best:
        model_best_filepath = os.path.join(intermediate_path,
                                           "model_best.pth.tar")
        shutil.copyfile(checkpoint_filepath, model_best_filepath)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = learning_rate * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

### Train/validate functions

In [7]:
# train function
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        # here we should call cuda() for input;
        # in the ImageNet example, the model is parallel by
        # torch.nn.DataParallel(model).cuda(), so no need to call cuda() there;
        # the option async=True works with pin_memory of DataLoader
        # pin_memory slows down DataLoader but fastens data transfer from
        # CPU to GPU
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0] #notice [0] here
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
             print("Epoch: [{0}][{1}/{2}]\t"
                   "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                   "Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
                   "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                   "Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t".format(
                       epoch, i, len(train_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses, top1=top1))

In [8]:
# validate function
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        input = input.cuda(async=True)
        target = target.cuda(async=True)
        # volatile=True means that no need to compute gradients
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1 = accuracy(output.data, target, topk=(1,))[0]
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
             print("Test: [{0}/{1}]\t"
                   "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                   "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                   "Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t".format(
                       i, len(val_loader), batch_time=batch_time,
                       loss=losses, top1=top1))

    print(" * Prec@1 {top1.avg:.3f}"
          .format(top1=top1))

    return top1.avg

### Create model and data loader

In [9]:
# create model
model = models.resnet50(pretrained=pretrained)
for param in model.parameters():
    param.requires_grad = False
# Parameters of newly constructed modules have requires_grad=True by default
# replace the last fully-connected layer
bn_expansion = 4
model.fc = nn.Linear(512 * bn_expansion, 8)
# for 1 GPU, it is unnecessary to use DataParallel
#model = torch.nn.DataParallel(model).cuda()
model.cuda()

ResNet (
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (relu): ReLU (inplace)
  (maxpool): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
  (layer1): Sequential (
    (0): Bottleneck (
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (downsample): Sequential (
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): Bott

In [10]:
# set this flag to get 2x speed improvement
# according to PyTorch Slack #beginner channel
cudnn.benchmark = True

In [11]:
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.Adam(model.fc.parameters(), learning_rate)

In [12]:
# Data loading code
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(train_path, transforms.Compose([
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(val_path, transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

### Train model

In [13]:
best_prec1 = 0
epochs = 3
for epoch in range(epochs):
    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion)

    # remember best prec@1 and save checkpoint
    is_best = prec1 > best_prec1
    best_prec1 = max(prec1, best_prec1)
    save_checkpoint({
        "epoch": epoch + 1,
        "arch": arch,
        "state_dict": model.state_dict(),
        "best_prec1": best_prec1,
    }, is_best)

Epoch: [0][0/205]	Time 1.770 (1.770)	Data 0.616 (0.616)	Loss 2.2480 (2.2480)	Prec@1 0.000 (0.000)	
Epoch: [0][10/205]	Time 0.073 (0.227)	Data 0.000 (0.056)	Loss 1.7457 (1.9839)	Prec@1 43.750 (25.568)	
Epoch: [0][20/205]	Time 0.078 (0.175)	Data 0.006 (0.051)	Loss 1.9110 (1.8483)	Prec@1 37.500 (32.738)	
Epoch: [0][30/205]	Time 0.077 (0.159)	Data 0.005 (0.052)	Loss 1.3527 (1.7629)	Prec@1 68.750 (37.702)	
Epoch: [0][40/205]	Time 0.196 (0.150)	Data 0.123 (0.052)	Loss 1.8813 (1.6992)	Prec@1 43.750 (41.006)	
Epoch: [0][50/205]	Time 0.072 (0.142)	Data 0.000 (0.049)	Loss 1.5655 (1.6581)	Prec@1 37.500 (42.892)	
Epoch: [0][60/205]	Time 0.196 (0.140)	Data 0.123 (0.050)	Loss 1.8922 (1.6666)	Prec@1 43.750 (42.520)	
Epoch: [0][70/205]	Time 0.076 (0.135)	Data 0.004 (0.048)	Loss 1.6285 (1.6835)	Prec@1 50.000 (42.077)	
Epoch: [0][80/205]	Time 0.113 (0.134)	Data 0.041 (0.048)	Loss 1.8330 (1.6778)	Prec@1 37.500 (41.975)	
Epoch: [0][90/205]	Time 0.077 (0.132)	Data 0.005 (0.048)	Loss 1.5061 (1.6651)	Prec@1 