# Multiclass object identifcation of dog breeds, Pytorch Edition

See [Keras version](https://www.kaggle.com/nothxplz/keras-inception-resnet-inception-resnet50) here.

Assumes following directory structure.

```bash
.
├── data
│   ├── classify_by_dir.sh
│   ├── labels.csv
│   ├── sample_submission.csv
│   ├── sample_submission.csv.zip
│   ├── test
│   ├── test.zip
│   ├── train
│   ├── train.zip
│   ├── unsorted
│   └── val
├── keras.best.h5
├── keras.ipynb
└── submit.csv
```
Save the following script in your data directory as `classify_by_dir.sh` then run

`unzip train.zip && mv train unsorted && ./classify_by_dir.sh`

```bash
#!/bin/bash
shuf -o labels.csv <labels.csv
unsorted_dir=unsorted
counter=0
# SHUFFLE THE FILE WOOO
while IFS=, read -r image class; do
	# very roughly 25% to val
	if (($counter == 4)); then
		sorted_dir=val
		mkdir -p $sorted_dir/$class
		mv $unsorted_dir/$image.jpg $sorted_dir/$class/$image.jpg
		counter=0
	else
		sorted_dir=train
		mkdir -p $sorted_dir/$class
		mv $unsorted_dir/$image.jpg $sorted_dir/$class/$image.jpg
	fi
	((counter++))

done <labels.csv
```

In [None]:
from __future__ import print_function

import os
import os.path
import shutil
import time

import numpy as np
print("np v.{}".format(np.__version__))
import torch
print("pytorch v.{}".format(torch.__version__))
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data as data
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm_notebook

# available models
# ['alexnet', 'densenet', 'densenet121', 'densenet161', 'densenet169', 'densenet201', ' inception', 'inception_v3', 'resnet', 'resnet101', 'resnet152', 'resnet18', 'resnet34',  'resnet50', 'squeezenet', 'squeezenet1_0', 'squeezenet1_1', 'vgg', 'vgg11', 'vgg11_bn', ' vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn']
# Globals
MODEL = 'resnet50'
RESUME = False
DATA_WORKERS = 4
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
EPOCHS = 30
WEIGHT_DECAY = 0.0005
CLASSES = 120
BEST_ACC = 0

In [None]:
model = models.__dict__[MODEL](pretrained=True)

for param in model.parameters():
    param.requires_grad = False

if MODEL is 'resnet50':
    model.fc = nn.Linear(2048, CLASSES)
    
model = torch.nn.DataParallel(model).cuda()
cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.module.fc.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

In [None]:
data_dir = os.path.join(os.getcwd(),'data')
traindir = os.path.join(data_dir, 'train')
valdir = os.path.join(data_dir, 'val')
testdir = os.path.join(data_dir, 'test')

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_loader = data.DataLoader(
    datasets.ImageFolder(traindir,
                         transforms.Compose([
                             transforms.RandomResizedCrop(224),
                             transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(),
                             normalize,
                         ])),
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=DATA_WORKERS,
    pin_memory=True)

val_loader = data.DataLoader(
    datasets.ImageFolder(valdir,
                         transforms.Compose([
                             transforms.Resize(256),
                             transforms.CenterCrop(224),
                             transforms.ToTensor(),
                             normalize,
                         ])),
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=DATA_WORKERS,
    pin_memory=True)

assert len(train_loader.dataset.classes) is len(val_loader.dataset.classes)
assert len(train_loader.dataset.classes) is CLASSES

In [None]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 10 epochs"""
    lr = LEARNING_RATE * (0.1 ** (epoch // 10))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    model.train()

    pbar = tqdm_notebook(train_loader)
    for i, (images, target) in enumerate(pbar):
        target = target.cuda(async=True)
        image_var = torch.autograd.Variable(images)
        label_var = torch.autograd.Variable(target)
        y_pred = model(image_var)
        loss = criterion(y_pred, label_var)
        losses.update(loss.data[0], images.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.set_description("EPOCH[{0}][{1}/{2}]".format(epoch, i, len(train_loader)))
        pbar.set_postfix(loss="{loss.val:.4f} ({loss.avg:.4f})".format(loss=losses))


In [None]:
def validate(val_loader, model, criterion):
    losses = AverageMeter()
    model.eval()
    pbar = tqdm_notebook(val_loader)
    for i, (images, labels) in enumerate(pbar):
        labels = labels.cuda(async=True)
        image_var = torch.autograd.Variable(images, volatile=True)
        label_var = torch.autograd.Variable(labels, volatile=True)
        y_pred = model(image_var)
        loss = criterion(y_pred, label_var)
        losses.update(loss.data[0], images.size(0))
        pbar.set_description("VALIDATION[{}/{}]".format(i, len(val_loader)))
        pbar.set_postfix(loss="{loss.val:.4f} ({loss.avg:.4f})".format(loss=losses))

    return losses.avg


In [None]:
for epoch in range(EPOCHS):
    
    adjust_learning_rate(optimizer, epoch)
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)
    # validate, and track loss to see how we did 
    loss = validate(val_loader, model, criterion)
    
    is_best = loss > BEST_ACC
    best_loss = max(loss, BEST_ACC)
    state = {
        'epoch': epoch + 1,
        'arch': MODEL,
        'state_dict': model.state_dict(),
        'best_loss': BEST_ACC,
    }
    torch.save(state, 'checkpoint.pth.tar')
    if is_best:
        shutil.copyfile('checkpoint.pth.tar', 'model_best_{}.pth.tar'.format(BEST_ACC))
