# Importation

In [6]:
import os
import copy
import numpy as np
import pandas as pd
from tqdm import tqdm
from time import time
from sklearn import preprocessing

import torchvision
from torchvision import models, transforms
from torchvision.io import read_image

import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# !pip install torchvision onnx-coreml

## todo : 
## pytorch oneshot y
## try sigmoid / none class / threshold on softmax for NONE class

# Global variables 

In [7]:
INPUT_SIZE = 224
BATCH_SIZE = 128
N_CLASS = 5
TRAIN_SPLIT = 0.7

PATH_LABELS = "../input/homemade-hand-gesture-dataset/index_label.csv"
PATH_IMG = "../input/homemade-hand-gesture-dataset/output/output"

PATH_LABELS_VALID = ""
PATH_IMG_VALID = ""

# Data functions

In [8]:
import cv2

class HandGestureDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, os.listdir(self.img_dir)[idx])
        image = read_image(img_path)
        label = self.img_labels.loc[self.img_labels["index"] == str("output/"+os.listdir(self.img_dir)[idx])]["label"].item()
        if self.transform:
            image = self.transform(image)
        return image, label

In [9]:
def prepare_data_vgg(data_type):
    ## Parameters fitting vgg/imagenet
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    ## pytorch transformer objects
    transformVGGTrain=transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomResizedCrop(INPUT_SIZE),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
#     transformVGGValid=transforms.Compose([
#             transforms.Resize(INPUT_SIZE),
#             transforms.CenterCrop(INPUT_SIZE),
#             transforms.ToTensor(),
#             transforms.Normalize(mean, std)
#         ])

    if data_type == "custom":
        ## Custom dataset
        VGG_dataset = HandGestureDataset(PATH_LABELS, PATH_IMG, transformVGGTrain)
        print("Dataset size:", len(VGG_dataset))
        train_size = int(len(VGG_dataset)*(TRAIN_SPLIT))
        valid_size = len(VGG_dataset) - train_size
        VGG_trainset, VGG_validset = torch.utils.data.random_split(VGG_dataset, [train_size, valid_size])
        VGG_trainloader = torch.utils.data.DataLoader(VGG_trainset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
        VGG_validloader = torch.utils.data.DataLoader(VGG_validset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
        
#         VGG_validset = HandGestureDataset(PATH_LABELS_VALID, PATH_IMG_VALID, transformVGGValid)
#         VGG_validloader = torch.utils.data.DataLoader(VGG_validset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
    
    ## CIPHAR10
    if data_type == "ciphar10":
        VGG_trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transformVGGTrain)
        VGG_trainloader = torch.utils.data.DataLoader(VGG_trainset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
        VGG_validset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transformVGGValid)
        VGG_validloader = torch.utils.data.DataLoader(VGG_validset, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
    
    return VGG_trainloader, VGG_validloader

# Loading data into pytorch dataset and dataloader objects

In [10]:
VGG_trainloader, VGG_validloader = prepare_data_vgg("custom")

# Model functions

In [11]:
def accuracy(yhat,y):
    if len(y.shape) == 1 or y.size(1) == 1:
        return (torch.argmax(yhat, 1).view(y.size(0), -1) == y.view(-1, 1)).double().mean()
    return (torch.argmax(yhat, 1). view(-1) == torch.argmax(y, 1).view(-1)).double().mean()

def train(model, epochs, train_loader, valid_loader, learning_rate, patience, label_encoder, feature_extract=False):
    ## Early stopping variables
    es = EarlyStopping(patience=patience)
    terminate_training = False
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
     
    ## TensorBoard setup
    model = model.to(device)
    writer = SummaryWriter(f"{TB_PATH}/{model.name}")
    
    ## Training only the parameters where we require gradient since we are fine-tuning
    params_to_update = model.parameters()
    print("params to learn:")
    if feature_extract:
        params_to_update = []
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t", name)
    else:
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                print("\t", name)
                
    ## Setting up our optimizer
    optim = torch.optim.Adam(params_to_update, lr=learning_rate)
    
    ## Setting up our loss function
    loss = nn.CrossEntropyLoss()
    
    ## Running the train loop
    print(f"running {model.name}")
    for epoch in range(epochs):
        cumloss, cumacc, count = 0, 0, 0
        model.train()
        for x,y in train_loader:
            optim.zero_grad()
            x = x.to(device)
            y = label_encoder.fit_transform(y)
            y = torch.as_tensor(y)
#             y = F.one_hot(y, num_classes=N_CLASS)
            y = y.to(device)
            yhat = model(x)
            l = loss(yhat, y)
            l.backward()
            optim.step()
            cumloss += l * len(x)
            cumacc += accuracy(yhat, y) * len(x)
            count += len(x)
#         writer.add_scalar('loss/train', cumloss/count,epoch)
#         writer.add_scalar('accuracy/train', cumacc/count,epoch)
        print("epoch :", epoch, end="")
        print(", train_loss: ", cumloss.cpu()/count, end="")
        print(", train_acc: ", cumacc.cpu()/count, end="")
        if epoch % 1 == 0:
            model.eval()
            with torch.no_grad():
                valid_cumloss, valid_cumacc, count = 0, 0, 0
                for x,y in valid_loader:
                    x = x.to(device)
                    y = label_encoder.fit_transform(y)
                    y = torch.as_tensor(y)
#                     y = F.one_hot(y, num_classes=N_CLASS)
                    y = y.to(device)
                    yhat = model(x)
                    valid_cumloss += loss(yhat,y) * len(x)
                    valid_cumacc += accuracy(yhat,y) * len(x)
                    count += len(x)
#                 writer.add_scalar(f'loss/valid', valid_cumloss/count,epoch)
#                 writer.add_scalar('accuracy/valid', valid_cumacc/count,epoch)
                print(", valid_loss: ", valid_cumloss.cpu()/count, end="")
                print(", valid_acc: ", valid_cumacc.cpu()/count)
                ## Early stopping
                if valid_cumacc/count > best_acc:
                    best_acc = valid_cumacc/count
                    best_model_wts = copy.deepcopy(model.state_dict())
                if es.step(valid_cumloss.cpu()/count):
                    terminate_training = True
                    break
        if terminate_training:
            break
    print('Best val Acc: {:4f}'.format(best_acc))
    ## Returns the best model
    model.load_state_dict(best_model_wts)
    return model

def set_parameter_requires_grad(model, feature_extract):
    if feature_extract:
        for name,p in model.named_parameters():
            if "features" in name:
                p.requires_grad = False    
            else:
                p.requires_grad = True  

# Loading the model and modifying the classifier part
### Maybe we could try to modify only the last classifier layer ?

In [12]:
TB_PATH = "/tmp/logs/sceance2"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Loading vgg16 model pretrained on imagenet
vgg = models.vgg16(pretrained=True)

## Modifies the vgg network classifier layers to fit our problem
vgg.classifier[0] = nn.Linear(25088, 8192)
vgg.classifier[3] = nn.Linear(8192, 1024)
vgg.classifier[6] = nn.Linear(1024, N_CLASS)
print(vgg.eval())

## Sets all the requires grad of the classifier layers to True
set_parameter_requires_grad(vgg, True)

# Implementing early stopping

In [13]:
class EarlyStopping(object):
    def __init__(self, mode='min', min_delta=0, patience=10, percentage=False):
        self.mode = mode
        self.min_delta = min_delta
        self.patience = patience
        self.best = None
        self.num_bad_epochs = 0
        self.is_better = None
        self._init_is_better(mode, min_delta, percentage)
        if patience == 0:
            self.is_better = lambda a, b: True
            self.step = lambda a: False

    def step(self, metrics):
        if self.best is None:
            self.best = metrics
            return False
        if np.isnan(metrics):
            return True
        if self.is_better(metrics, self.best):
            self.num_bad_epochs = 0
            self.best = metrics
#             print('improvement!')
        else:
            self.num_bad_epochs += 1
#             print(f'no improvement, bad_epochs counter: {self.num_bad_epochs}')
        if self.num_bad_epochs >= self.patience:
            return True
        return False

    def _init_is_better(self, mode, min_delta, percentage):
        if mode not in {'min', 'max'}:
            raise ValueError('mode ' + mode + ' is unknown!')
        if not percentage:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - min_delta
            if mode == 'max':
                self.is_better = lambda a, best: a > best + min_delta
        else:
            if mode == 'min':
                self.is_better = lambda a, best: a < best - (
                            best * min_delta / 100)
            if mode == 'max':
                self.is_better = lambda a, best: a > best + (
                            best * min_delta / 100)

# Training only the modified parts of the classifier

In [14]:
## Fine-tuning the model on our data
vgg.name = "VGG"

le = preprocessing.LabelEncoder()

best_model = train(model=vgg, 
                   epochs=100, 
                   train_loader=VGG_trainloader, 
                   valid_loader=VGG_validloader, 
                   learning_rate=1e-3, ## learning rate for Adam optimizer
                   patience=5, ## metric for earlystopping : val_loss
                   label_encoder=le) 

# Checking metrics

# Saving the model in .pth and .onnx extension

In [15]:
PATH = "./"
torch.save(vgg.state_dict(), os.path.join(PATH,"vgg.pth"))

In [16]:
del vgg

In [17]:
# model = models.vgg16(pretrained=True)
# model.classifier[0] = nn.Linear(25088, 8192)
# model.classifier[3] = nn.Linear(8192, 1024)
# model.classifier[6] = nn.Linear(1024, N_CLASS)
# model.load_state_dict(torch.load(os.path.join(PATH,"vgg.pth"), map_location='cpu'))
# model.eval() 

# dummy_input = torch.randn(BATCH_SIZE, 3, INPUT_SIZE, INPUT_SIZE)  
# torch.onnx.export(model,   
#                   dummy_input, 
#                   "vgg.onnx",
#                   export_params=True,
#                   do_constant_folding=True, 
#                   input_names = ['modelInput'],
#                   output_names = ['modelOutput'])