# RecVis Assigment 3 

**Author:** Arthur Pignet
arthur.pignet@mines-paristech.fr

This notebook is the second of the two notebook I did for this assigment.

It covers an approach with feature extraction via an autoencoder trained on unlabelled data. The unlabelled data are extracted from the 2019 iNaturalist dataset.

The first notebook covered the native CNN training, finetuning of a ResNet pretrained on ImageNet and a features extraction with scattering wavelet network followed by a CNN.

The notebook has been run with Google Colab, on P100 GPUs. 

## Set up

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
!git config --global user.email "arthur.pignet@mines-paristech.fr"
!git config --global user.name "arthurPignet"
!git clone https://github.com/arthurPignet/recvis21_a3.git
%cd recvis21_a3
!ls
!pip install -r requirements.txt
!cd .. 

## Download datasets

In [None]:
!wget -r https://ml-inat-competition-datasets.s3.amazonaws.com/2019/train_val2019.tar.gz 
!wget -r  https://ml-inat-competition-datasets.s3.amazonaws.com/2019/train2019.json.tar.gz
!wget -r https://ml-inat-competition-datasets.s3.amazonaws.com/2019/val2019.json.tar.gz

In [None]:
! tar -xzf ml-inat-competition-datasets.s3.amazonaws.com/2019/train_val2019.tar.gz train_val2019/Birds # we only want the birds
! tar -xzf ml-inat-competition-datasets.s3.amazonaws.com/2019/train2019.json.tar.gz 
! tar -xzf ml-inat-competition-datasets.s3.amazonaws.com/2019/val2019.json.tar.gz 

In [None]:
!rm -r ml-inat-competition-datasets.s3.amazonaws.com/ # free disk space

In [None]:
!wget -q https://www.di.ens.fr/willow/teaching/recvis18orig/assignment3/bird_dataset.zip
!unzip bird_dataset.zip 
!rm bird_dataset.zip

## Imports

In [None]:
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.utils.data as data
from PIL import Image
import os
import os.path

from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

from src.data import iBirdDataset, get_data_transform
from models import VGG13AE


In [None]:
ls

## Load datasets

### Data related parameters 

In [None]:
input_size = 224
batch_size = 48
labelled_data_dir = "./bird_dataset/"
unlabelled_data_dir = "./"
data_transforms = get_data_transform(input_size)

### Load the data

In [None]:
train_data = BirdDataset(root = '.',
                        annFile = 'train2019.json',
                        transform=data_transforms['train'])

val_data = BirdDataset(root = '.',
                        annFile = 'val2019.json',
                        transform=data_transforms['val'])

train_loader = torch.utils.data.DataLoader(train_data,
    batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = torch.utils.data.DataLoader(val_data,
    batch_size=12, shuffle=False, num_workers=1)

In [None]:
print('Number of samples: ', len(train_data))
img, target = train_data[np.random.randint(len(train_data))] # load random sample

print("Image Size: ", img.size())
plt.imshow(img.numpy().transpose([1, 2, 0]) )
print(target)

In [None]:
labelled_train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(labelled_data_dir + '/train_images',
                         transform=data_transforms['train']),
    batch_size=batch_size, shuffle=True, num_workers=1)
labelled_val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(labelled_data_dir + '/val_images',
                         transform=data_transforms['val']),
    batch_size=batch_size, shuffle=False, num_workers=1)

## Model 

In [None]:
model = VGG13AE(latent_space_dim=512)
model.mode_autoencoder = True
model.cuda()
summary(model, (3,224,224))

## Training

I started by 8 epochs of autoencoder, which is realy long even on P100 GPU (~2hours), the model is big, and there is lots of data. 

In [None]:
# parameters
num_epochs_clas = 15
num_epochs_ae = 8

lr = 0.01
momentum = 0.8

log_interval = 10

In [None]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
def train_classifier(epoch):
    model.train()
    model.mode_autoencoder = False
    for batch_idx, (data, target) in enumerate(labelled_train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(labelled_train_loader.dataset),
                100. * batch_idx / len(labelled_train_loader), loss.data.item()))
            
def train_autoencoder(epoch):
    model.train()
    model.mode_autoencoder = True
    for batch_idx, (data, target) in enumerate(train_loader):

        if use_cuda:
            data= data.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.MSELoss()
        loss = criterion(output, data)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation_autoencoder():
    model.eval()
    validation_loss = 0
    model.mode_autoencoder = True
    for data, target in val_loader:
        if use_cuda:
            data = data.cuda()
        output = model(data)
        # sum up batch loss
        criterion = nn.MSELoss()
        validation_loss += criterion(output, data).data.item()

    print('\nValidation set: Average loss: {:.4f}'.format(
        validation_loss))
    
    return validation_loss

def validation_classifier():
    model.eval()
    model.mode_autoencoder = False
    validation_loss = 0
    correct = 0
    for data, target in labelled_val_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        validation_loss += criterion(output, target).data.item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(labelled_val_loader.dataset),
        100. * correct / len(labelled_val_loader.dataset)))
    
    return validation_loss, 100. * correct / len(labelled_val_loader.dataset)


In [None]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

In [None]:
for epoch in range(1, num_epochs_ae + 1):
  train_autoencoder(epoch)
  val_loss_ae = validation_autoencoder()

In [None]:
best_epoch = 0
best_loss = 100000
patience=0
for epoch in range(1, num_epochs_clas + 1):
  
    train_classifier(epoch)
    val_loss, val_acc = validation_classifier()
    val_loss_per_epoch_ae.append(val_loss)
    if val_loss < best_loss:
      patience = 0
      best_epoch = epoch
      best_loss = val_loss
      best_state = model.state_dict()
    else:
      patience += 1

    
    if patience > 4:
      break
      #model_file = experiment + '/model_' + str(epoch) + '.pth'
    #torch.save(model.state_dict(), model_file)
   # print('Saved model to ' + model_file + '. You can run `python evaluate.py --model ' + model_file + '` to generate the Kaggle formatted csv file\n')

##kaggle submission

In [None]:
from tqdm import tqdm
import os
import PIL.Image as Image
from src.utils import pil_loader
data_dir = 'bird_dataset'
test_dir = data_dir + '/test_images/mistery_category'
experiment = 'recvis21_a3/experiment_ae_features'

output_file = open(f'{experiment}/result_ae_features.csv', "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms['val'](pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        #sc = scattering(data)
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

In [None]:
%cd recvis21_a3/
!git add ae_features/result_without_ae_features.csv
!git commit -m 'add result ae features'

In [None]:
!git push

### Comparison with the same classifier not pretrained on unlabelled data.

Note that I did not have enough space on the gpu memory to have 2 models at the same time, and I often restarted the VM before lauching this part.

In [None]:
model = VGG13AE(latent_space_dim=512) 
model.mode_autoencoder = False

In [None]:
best_epoch = 0
best_loss = 100000
patience=0
for epoch in range(1, num_epochs_clas + 1):
  
    train_classifier(epoch)
    val_loss, val_acc = validation_classifier()
    val_loss_per_epoch_ae.append(val_loss)
    if val_loss < best_loss:
      patience = 0
      best_epoch = epoch
      best_loss = val_loss
      best_state = model.state_dict()
    else:
      patience += 1

    
    if patience > 4:
      break

### Kaggle submission

In [None]:
from tqdm import tqdm
import os
import PIL.Image as Image
from src.utils import pil_loader
data_dir = 'bird_dataset'
test_dir = data_dir + '/test_images/mistery_category'
experiment = 'recvis21_a3/experiment_ae_features'

output_file = open(f'{experiment}/result_ae_without_features.csv', "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms['val'](pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        #sc = scattering(data)
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

In [None]:
%cd recvis21_a3/
!git add ae_features/result_without_ae_features.csv
!git commit -m 'add result without ae features'

In [None]:
!git push