# Everything starts here

In [1]:
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import utils
from model import MobileNetRatio

## 1. Setting

### 1.1 Model parameters

In [2]:
# rebuild model
model_parameters = ('mobilenetratio', utils.num_classes, True)

### 1.2 Data augmentation

In [3]:
# data augmentation and normalization for training
# just normalization for validation
data_transforms = {
    'training': transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'testing': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

## 2. CNN (backbone) finetuning

### 2.0 Reload model

In [4]:
model = MobileNetRatio(*model_parameters).to(utils.device)

### 2.1 Set training hyperparameters

In [5]:
# training parameters
LEARNING_RATE = 0.0005
EPOCHS = 6
BATCH_SIZE = 64
MOMENTUM = 0.9
GAMMA = 0.5
STEP_SIZE = 1

In [8]:
# create pytorch datasets
#datasets = {x: utils.HernitiaDataset(utils.dfs_path + '/' + x + '.pkl', return_ratio_frame_idx = True, transform = data_transforms[x])  
#        for x in ['training', 'validation']}

IndentationError: unexpected indent (401771937.py, line 3)

In [10]:
datasets = {'training': utils.HernitiaDataset(utils.dfs_path + '/training_no_temp_os.pkl', return_ratio_frame_idx = True, transform = data_transforms['training']),
            'validation': utils.HernitiaDataset(utils.dfs_path + '/validation_no_temp.pkl', return_ratio_frame_idx = True, transform = data_transforms['validation'])}

In [11]:
# instantiate data loaders
dataloaders = {x: utils.DataLoader(dataset=datasets[x], batch_size=BATCH_SIZE, shuffle=True) for x in ['training', 'validation']}

In [12]:
# criterion is cross entropy loss
criterion = nn.CrossEntropyLoss()

# observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

# decay LR by a factor GAMMA every STEP_SIZE epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

### 2.2 Train

In [None]:
utils.train_model(model = model, 
                    model_name = model.model_name,  #  name of the model which will be the name of the saved weights file within /weights
                    dataloaders = dataloaders, 
                    criterion = criterion, 
                    optimizer = optimizer, 
                    scheduler = exp_lr_scheduler, 
                    num_epochs=EPOCHS)

Epoch 1/6
----------
training Loss: 0.3543 Acc: 0.8861
validation Loss: 0.7494 Acc: 0.7689
Epoch 2/6
----------

## 3. Freeze backbone - Unfreeze fc12 and retrain

### 3.0 Reload model

In [13]:
model = MobileNetRatio(*model_parameters).to(utils.device)

In [14]:
model.load_state_dict(torch.load(utils.weights_path + '/' + model.model_name + '.pkl'))

<All keys matched successfully>

In [15]:
# unfreeze the linear layer processing the ratio
model.freeze_backbone()

### 3.1 Set training hyperparameters

In [16]:
# training parameters
LEARNING_RATE = 0.002
EPOCHS = 10
BATCH_SIZE = 64
MOMENTUM = 0.9
GAMMA = 0.8
STEP_SIZE = 1

In [17]:
# create pytorch datasets
datasets = {x: utils.HernitiaDataset(utils.dfs_path + '/' + x + '_no_temp.pkl', return_ratio_frame_idx = True, transform = data_transforms[x])  
        for x in ['training', 'validation']}

In [18]:
# instantiate data loaders
dataloaders = {x: utils.DataLoader(dataset=datasets[x], batch_size=BATCH_SIZE, shuffle=True) for x in ['training', 'validation']}

In [19]:
# criterion is cross entropy loss
criterion = nn.CrossEntropyLoss()

# observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

# decay LR by a factor GAMMA every STEP_SIZE epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

### 3.2 Train

In [None]:
utils.train_model(model = model, 
                    model_name = model.model_name,  #  name of the model which will be the name of the saved weights file within /weights
                    dataloaders = dataloaders, 
                    criterion = criterion, 
                    optimizer = optimizer, 
                    scheduler = exp_lr_scheduler, 
                    num_epochs=EPOCHS)

Epoch 1/10
----------
training Loss: 0.1550 Acc: 0.9481
validation Loss: 0.7855 Acc: 0.8065
Epoch 2/10
----------
training Loss: 0.1490 Acc: 0.9495
validation Loss: 0.8309 Acc: 0.8041
Epoch 3/10
----------
training Loss: 0.1463 Acc: 0.9506
validation Loss: 0.8382 Acc: 0.8011
Epoch 4/10
----------
training Loss: 0.1447 Acc: 0.9518
validation Loss: 0.8270 Acc: 0.8009
Epoch 5/10
----------
training Loss: 0.1424 Acc: 0.9514
validation Loss: 0.8288 Acc: 0.8013
Epoch 6/10
----------

## 4. Evaluate model

In [3]:
model = MobileNetRatio(*model_parameters).to(utils.device)

In [4]:
# reload weights from finetuning
model.load_state_dict(torch.load(utils.weights_path + '/' + model.model_name + '.pkl'))

<All keys matched successfully>

In [5]:
BATCH_SIZE = 64

# criterion is cross entropy loss
criterion = nn.CrossEntropyLoss()

In [11]:
# create pytorch datasets
dataset = utils.HernitiaDataset(utils.dfs_path + '/' + 'validation' + '_no_temp.pkl', return_ratio_frame_idx = True, transform = data_transforms['validation'])

In [12]:
# instantiate data loaders
dataloader = utils.DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=False)

In [13]:
utils.evaluate_model(model, dataloader, criterion)

Loss: 0.7496 Acc: 0.8086


## 5. Make Kaggle prediction

In [4]:
model = MobileNetRatio(*model_parameters).to(utils.device)

In [10]:
utils.predict_kaggle(model = model, 
                     model_name = model.model_name, 
                     return_ratio_frame_idx = True, 
                     transform = data_transforms['testing'],
                     predictions_name = model.model_name)
    



## 6. Smooth prediction

 +2% test accuracy from what I have seen.

In [6]:
utils.smooth_predictions(predictions_name = model.model_name)