# Everything starts here

In [5]:
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import utils
from model import MobileNet

## 1. Setting

### 1.1 Model parameters

In [6]:
# rebuild model
model_parameters = ('mobilenet', utils.num_classes, True)

### 1.2 Data augmentation

In [7]:
# data augmentation and normalization for training
# just normalization for validation
data_transforms = {
    'training': transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'testing': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

## 2. CNN (backbone) tuning

### 2.0 Reload model

In [4]:
model = MobileNet(*model_parameters).to(utils.device)

### 2.1 Set training hyperparameters

In [5]:
# training parameters
LEARNING_RATE = 0.005
EPOCHS = 3
BATCH_SIZE = 128
MOMENTUM = 0.9
GAMMA = 0.2
STEP_SIZE = 1

In [6]:
# create pytorch datasets
datasets = {'training': utils.HernitiaDataset(utils.dfs_path + '/master_training_no_temp.pkl', return_ratio_frame_idx = False, transform = data_transforms['training'])}

In [7]:
# instantiate data loaders
dataloaders = {x: utils.DataLoader(dataset=datasets[x], batch_size=BATCH_SIZE, shuffle=True) for x in ['training']}

In [8]:
# criterion is cross entropy loss
criterion = nn.CrossEntropyLoss()

# observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

# decay LR by a factor GAMMA every STEP_SIZE epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

### 2.2 Train

In [9]:
utils.train_model(model = model, 
                    model_name = model.model_name,  #  name of the model which will be the name of the saved weights file within /weights
                    dataloaders = dataloaders, 
                    criterion = criterion, 
                    optimizer = optimizer, 
                    scheduler = exp_lr_scheduler, 
                    num_epochs=EPOCHS,
                    validation = False)

Epoch 1/3
----------
training Loss: 0.3587 Acc: 0.8778
Epoch 2/3
----------
training Loss: 0.1455 Acc: 0.9511
Epoch 3/3
----------
training Loss: 0.1107 Acc: 0.9628
Training complete in 63m 57s


## 3. Evaluate model

In [8]:
model = MobileNet(*model_parameters).to(utils.device)

In [4]:
# reload weights from finetuning
model.load_state_dict(torch.load(utils.weights_path + '/' + model.model_name + '.pkl'))

<All keys matched successfully>

In [5]:
BATCH_SIZE = 64

# criterion is cross entropy loss
criterion = nn.CrossEntropyLoss()

In [11]:
# create pytorch datasets
dataset = utils.HernitiaDataset(utils.dfs_path + '/' + 'validation' + '_no_temp.pkl', return_ratio_frame_idx = True, transform = data_transforms['validation'])

In [12]:
# instantiate data loaders
dataloader = utils.DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=False)

In [13]:
utils.evaluate_model(model, dataloader, criterion)

Loss: 0.7496 Acc: 0.8086


## 5. Make Kaggle prediction

In [9]:
model = MobileNet(*model_parameters).to(utils.device)

In [10]:
# reload weights from finetuning
model.load_state_dict(torch.load(utils.weights_path + '/' + model.model_name + '.pkl'))

<All keys matched successfully>

In [None]:
utils.predict_kaggle(model = model, 
                     model_name = model.model_name, 
                     return_ratio_frame_idx = False, 
                     transform = data_transforms['testing'],
                     predictions_name = model.model_name)
    



## 6. Smooth prediction

 +2% test accuracy from what I have seen.

In [6]:
utils.smooth_predictions(predictions_name = model.model_name)