# RecVis Assigment 3 

**Author:** Arthur Pignet
arthur.pignet@mines-paristech.fr

This notebook is the first of the two notebook I did for this assigment

It will cover the native CNN training, finetuning of a ResNet pretrained on ImageNet and a features extraction with scattering wavelet network followed by a CNN.

The second notebook cover another approach with feature extraction via an autoencoder trained on unlabelled data. 

The notebook has been run with Google Colab, on P100 GPUs. 

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
!git config --global user.email "arthur.pignet@mines-paristech.fr"
!git config --global user.name "arthurPignet"
!git clone https://github.com/arthurPignet/recvis21_a3.git
%cd recvis21_a3
!ls

In [None]:
!pip install -r requirements.txt

In [None]:
# get the data
!wget -q https://www.di.ens.fr/willow/teaching/recvis18orig/assignment3/bird_dataset.zip
!unzip bird_dataset.zip 
!rm bird_dataset.zip

### Standart model

This is the native model.
In practice I did not submitted the test prediction to Kaggle as it would have been a wasted submission token

In [None]:
!python main.py --experiment 'experiment_0'

In [None]:
!python evaluate.py --model experiment_0/model_10.pth

In [None]:
!git add experiment_0/*
!git commit -m 'add experiment_0, with the given model and parameters'
!git push

## ResNet Finetuning

For this section I heavily relied on the associated pytorch tutorial.

### Imports

In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchsummary import summary
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix 
import seaborn as sns
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

from src.models import initialize_pretrained_model
from src.data import get_data_transform

In [None]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

### Parameters

In [None]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "./bird_dataset/"
experiment = './experiment_resnet'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"


# Number of classes in the dataset
num_classes = 20

# Batch size for training (change depending on how much memory you have)
batch_size = 32

# Number of epochs to train for
num_epochs = 100

lr = 0.01
momentum = 0.8

log_interval = 2
# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = False


### Model

In [None]:
model, input_size =  initialize_pretrained_model(model_name, num_classes, feature_extract, use_pretrained=True)


In [None]:
model.cuda()
summary(model, (3,input_size,input_size))

### Data

In [None]:
# data transform
data_transform = get_data_transform(input_size)

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/train_images',
                         transform=data_transforms['train']),
    batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/val_images',
                         transform=data_transforms['val']),
    batch_size=batch_size, shuffle=False, num_workers=1)

###training

In [None]:
# Neural network and optimizer

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation():
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in val_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        validation_loss += criterion(output, target).data.item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    
    return validation_loss, 100. * correct / len(val_loader.dataset)



In [None]:
best_epoch = 0
best_loss = 100000
for epoch in range(1, num_epochs + 1):
    train(epoch)
    val_loss, accuracy = validation()
    if val_loss < best_loss:
      patience = 0
      best_epoch = epoch
      best_loss = val_loss
      best_state = model.state_dict()
    else:
      patience += 1

    if patience > 4:
      break
      #model_file = experiment + '/model_' + str(epoch) + '.pth'
      #torch.save(model.state_dict(), model_file)
      #print('Saved model to ' + model_file + '. You can run `python evaluate.py --model ' + model_file + '` to generate the Kaggle formatted csv file\n')
    if epoch % 10 == 0:
      optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9,
                                    weight_decay=0.0005)
      lr*=0.8

In [None]:
#state_dict = torch.load(model_file)
model.load_state_dict(best_state)

### Evaluation

The following script is here to evaluate the performance of the model per class on the validation set, in order to detect unbalanced validation dataset


In [None]:
model.eval()
validation_loss = 0
correct = 0
n_target = []
n_output = []
for data, target in val_loader:
    if use_cuda:
        data, target = data.cuda(), target.cuda()
    output = model(data)
    n_target += list(target.cpu().detach().numpy()) 
    n_output += list(np.argmax(output.cpu().detach().numpy(), axis=1))
    # sum up batch loss
    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    validation_loss += criterion(output, target).data.item()
    # get the index of the max log-probability
    pred = output.data.max(1, keepdim=True)[1]
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

validation_loss /= len(val_loader.dataset)
print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    validation_loss, correct, len(val_loader.dataset),
    100. * correct / len(val_loader.dataset)))

In [None]:
conf = confusion_matrix(n_target, n_output)
sns.heatmap(conf)

### Kaggle submission

In [None]:
from tqdm import tqdm
import os
import PIL.Image as Image
from src.utils import pil_loader

model.eval()
test_dir = data_dir + '/test_images/mistery_category'

output_file = open(f'{experiment}/result_resnet.csv', "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms['val'](pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        sc = scattering(data)
        output = model(sc)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

In [None]:
!git add experiment_resnet/result_resnet.csv
!git commit -m 'add experiment with resnet finetune'
!git push

## Scattering


### Imports

In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

from kymatio.torch import Scattering2D
from src.models import CNNScattering 
from src.data import get_data_transform


### Parameters

In [None]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "./bird_dataset/"
experiment = 'experiment_scattering'
!mkdir ./experiment_scattering

# Number of classes in the dataset
num_classes = 20

# Batch size for training (change depending on how much memory you have)
batch_size = 128

# Number of epochs to train for
num_epochs = 30

lr = 0.001
momentum = 0.8

log_interval = 2

input_size = 64


### Data

In [None]:
# data transform

data_transform = get_data_transform(input_size)

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/train_images',
                         transform=data_transforms['train']),
    batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(data_dir + '/val_images',
                         transform=data_transforms['val']),
    batch_size=batch_size, shuffle=False, num_workers=1)

In [None]:
def train(model, device, train_loader, optimizer, epoch, scattering):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        sc =scattering(data)
        output = model(sc)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader, scattering):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(scattering(data))
            test_loss += F.cross_entropy(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

scattering = Scattering2D(J=2, shape=(input_size, input_size))
K = 81*3
if use_cuda:
    scattering = scattering.cuda()

model = CNNScattering(K).to(device)

In [None]:
from torchsummary import summary
summary(model, (3, 81, 16, 16)) # the input is 81 channel because of the wavelet decomposition (and non-linearity)

### Training

In [None]:
# Optimizer
lr = 0.1
for epoch in range(0, 90):
    if epoch%20==0:
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9,
                                    weight_decay=0.0005)
        #lr*=0.2

    train(model, device, train_loader, optimizer, epoch+1, scattering)
    test(model, device, val_loader, scattering)

### kaggle submission

In [None]:
from tqdm import tqdm
import os
import PIL.Image as Image
from src.utils import pil_loader

model.eval()
test_dir = data_dir + '/test_images/mistery_category'

output_file = open(f'{experiment}/result_scattering.csv', "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms['val'](pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        sc = scattering(data)
        output = model(sc)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

In [None]:
!git add experiment_scattering/result_scattering.csv
!git commit -m 'add experiment with small cnn and scattering'
!git push