## Start model training:

- Simple classifier that uses cropped images from detectron2
- Even the images that are not cropped 

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim 
from torchvision import datasets
from torch.autograd import Variable
import numpy as np
import pandas as pd
import cv2
import argparse
import os
from PIL import Image, ImageEnhance, ImageOps
from tqdm import tqdm
import random

In [2]:
load_dir = '../embeddings'
data = "../cropped_bird_dataset"
#img_size = 299
batch_size = 32
epochs = 50
lr = 0.01
momentum = 0.9
weight_decay = 3e-4
grad_clip = 5.
seed = 1
use_cuda = False
experiment='../experiment'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(seed)

<torch._C.Generator at 0x7f2644589f00>

In [3]:
# 2-layers network of the features

# Features and labels
features_train = torch.load(os.path.join(load_dir, "birds_features_train.pt"), map_location=torch.device(device))
labels_train = torch.load(os.path.join(load_dir, "birds_labels_train.pt"), map_location=torch.device(device))

features_val = torch.load(os.path.join(load_dir, "birds_features_val.pt"), map_location=torch.device(device))
labels_val = torch.load(os.path.join(load_dir, "birds_labels_val.pt"), map_location=torch.device(device))

# X = features.to("cpu").numpy()

# Dataloaders
features_tensor = torch.stack([i for i in features_train])
labels_tensor = torch.stack([i for i in labels_train])
train_data = torch.utils.data.TensorDataset(features_tensor, labels_tensor) 

features_tensor = torch.stack([torch.Tensor(i) for i in features_val])
labels_tensor = torch.stack([torch.Tensor([i]) for i in labels_val])
val_data = torch.utils.data.TensorDataset(features_tensor,labels_tensor)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=0)

In [4]:
# Model
class Classifier(nn.Module):
    def __init__(self,embedding_dim):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(embedding_dim, 512)
        self.fc2 = nn.Linear(512, 20)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Classifier(features_train[0].shape[0])

if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

# Optimizer, LR, and criterion
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
criterion = torch.nn.CrossEntropyLoss()

# Training functions
def train_classifier(model, train_loader, optimizer, lr_scheduler, criterion, epoch):
    lr_scheduler.step()
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
              data, target = Variable(data.cuda()), Variable(target.cuda().long())
        else:
              data, target = Variable(data), Variable(target.long())
        optimizer.zero_grad()
        output = model(data)
        #target = target.squeeze(1)
        loss = criterion(output, target)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation_classifier(model, criterion, val_loader):
    model.eval()
    validation_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            if use_cuda:
                data, target = Variable(data.cuda()), Variable(target.cuda().long())
            else:
                data, target = Variable(data), Variable(target.long())
            output = model(data)
            target = target.squeeze(1)
            # sum up batch loss
            validation_loss += criterion(output, target).data.item()
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    return(100. * correct / len(val_loader.dataset))

Using CPU


In [5]:
# Training the classifier 
for epoch in range(1, epochs + 1):
    train_classifier(model, train_loader, optimizer, lr_scheduler, criterion, epoch)
    val_acc=validation_classifier(model, criterion, val_loader)
    if val_acc>=68:
      # Save only when it is good enough
        model_file = experiment + '/model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
        print('Saved model to ' + model_file + '. You can run `python evaluate.py --model ' + model_file + '` to generate the Kaggle formatted csv file\n')


Validation set: Average loss: 0.1184, Accuracy: 11/100 (11%)





Validation set: Average loss: 0.1169, Accuracy: 11/100 (11%)

Validation set: Average loss: 0.1150, Accuracy: 25/100 (25%)

Validation set: Average loss: 0.1062, Accuracy: 26/100 (26%)

Validation set: Average loss: 0.1013, Accuracy: 40/100 (40%)

Validation set: Average loss: 0.0970, Accuracy: 42/100 (42%)

Validation set: Average loss: 0.0903, Accuracy: 50/100 (50%)

Validation set: Average loss: 0.0867, Accuracy: 60/100 (60%)

Validation set: Average loss: 0.0807, Accuracy: 49/100 (49%)

Validation set: Average loss: 0.0753, Accuracy: 53/100 (53%)

Validation set: Average loss: 0.0640, Accuracy: 60/100 (60%)

Validation set: Average loss: 0.0698, Accuracy: 58/100 (58%)

Validation set: Average loss: 0.0617, Accuracy: 60/100 (60%)

Validation set: Average loss: 0.0596, Accuracy: 62/100 (62%)

Validation set: Average loss: 0.0602, Accuracy: 65/100 (65%)

Validation set: Average loss: 0.0594, Accuracy: 66/100 (66%)

Validation set: Average loss: 0.0608, Accuracy: 65/100 (65%)

Validat

In [6]:
# Test with Test Time Augmentation

# Test features
features_test = torch.load(os.path.join(load_dir, 'birds_features_train.pt'), map_location=torch.device(device))

best_model_path = "../experiment/model_18.pth"

# Loading trained model
state_dict = torch.load(best_model_path)
model = Classifier(features_test[0].shape[0])
model.load_state_dict(state_dict)
model.eval()

Classifier(
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=20, bias=True)
)

In [None]:
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, "rb") as f:
        with Image.open(f) as img:
            return img.convert("RGB")

output_file = "../experiment/kaggle.csv"

output_file = open(outfile, "w")

output_file.write("Id,Category\n")

test_list=[]
for line in open(os.path.join(args.data+ '/flip_bird_dataset/test.txt'), 'r'):
    test_list.append(line[:-1])
    

In [None]:
for f in tqdm(os.listdir(test_dir)):
    if "jpg" in f:
        data = data_transforms["test"](pil_loader(test_dir + "/" + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

print(
    "Succesfully wrote "
    + args.outfile
    + ", you can upload this file to the kaggle competition website"
)