## Regression + Classification  - ResNet50 with K-fold - L2loss

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"


In [None]:
# This file contains all the main external libs we'll use
from fastai.imports import *
from fastai.conv_learner import *
from fastai.plots import *

import pandas as pd
import numpy as np

from sklearn.model_selection import KFold

from sklearn.metrics import *
import math


In [None]:
PATH = "../data/"
sz=224
arch=resnet50
bs=8

In [None]:
# CSV for classification task
Classification_csv = f'{PATH}Dataset.csv'

In [None]:
Classification_df = pd.read_csv(Classification_csv)
Classification_df.head()

In [None]:
# CSV for regresssion task
Regression_csv = f'{PATH}Dataset_Regression.csv'

In [None]:
Regression_df = pd.read_csv(Regression_csv)
Regression_df.head()

In [None]:
# Validation indices

n = len(list(open(Regression_csv)))-1
# Return validation indexes using a 10% split
val_idxs = get_cv_idxs(n,val_pct=0.1)
print('n:',n)
print('Nb val_idxs',len(val_idxs))
print('val_idxs',val_idxs)

In [None]:
# Transformations
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on)

In [None]:
# Define regression dataset
md = ImageClassifierData.from_csv(PATH, 'data_all', Regression_csv, tfms=tfms, continuous=True, 
   bs=bs, val_idxs=val_idxs, num_workers = 0)

# Define classification dataset
md2 = ImageClassifierData.from_csv(PATH, 'data_all', Classification_csv, tfms=tfms, continuous=False, 
   bs=bs, val_idxs=val_idxs, num_workers = 0)



In [None]:
class ConcatLblDataset(Dataset):
    def __init__(self, ds, y2): self.ds,self.y2 = ds,y2
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x,y = self.ds[i]
        return (x, (y,self.y2[i]))

In [None]:
# Combine datasets
trn_ds2 = ConcatLblDataset(md.trn_ds, md2.trn_y)
val_ds2 = ConcatLblDataset(md.val_ds, md2.val_y)

md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2

In [None]:
head_reg = nn.Sequential(
    AdaptiveConcatPool2d(),
    Flatten(),
    nn.BatchNorm1d(4096),
    nn.Dropout(0.25),
    nn.Linear(4096,1+5),
)

models = ConvnetBuilder(arch, 0, 0, 0, custom_head=head_reg)

learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam

In [None]:
learn

In [None]:
# Cell to run for this notebook
# Adaptation from "pascal_Clem" notebook
def detn_loss(input, target):
    r_t,c_t = target
    r_i,c_i = input[:, :1], input[:, 1:]
    r_i = F.sigmoid(r_i)*200+600
    # I looked at these quantities separately first then picked a multiplier
    #   to make them approximately equal
    #print('r_t',r_t)
    #print('c_t',c_t)
    #print('r_i',r_i)
    #print('c_i',c_i)
    #print('\nL1_loss',F.l1_loss(r_i, r_t))
    #print('Cross_entropy loss',F.cross_entropy(c_i, c_t))
    return F.mse_loss(r_i, r_t) + (F.cross_entropy(c_i, c_t)*1000)


def detn_l2(input, target):
    r_t,_ = target
    r_i = input[:, :1]
    r_i = F.sigmoid(r_i)*200+600
    #print('r_i',r_i)
    #print('r_t',r_t)
    return F.mse_loss(V(r_i),V(r_t)).data

def detn_acc(input, target):
    _,c_t = target
    c_i = input[:, 1:]
    #print('c_i',V(c_i))
    #print('c_t',V(c_t))
    #return V(C_i)
    # Code from Clem
    #preds = np.argmax(V(c_i), 1)
    return accuracy_np(to_np(c_i), to_np(c_t))
    #return (preds==V(c_t)).mean()

learn.crit = detn_loss
learn.metrics = [detn_l2, detn_acc]

In [None]:
lrf=learn.lr_find(end_lr=10)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot(n_skip = 5)

In [None]:
lr = 2e-3
learn.fit(lr, 5)

In [None]:
learn.fit(lr, 5, cycle_len = 1)

In [None]:
learn.unfreeze()
lr = 1e-3
lrs=np.array([lr/9,lr/3,lr])

In [None]:
lrf=learn.lr_find()

In [None]:
learn.sched.plot(n_skip=5)

In [None]:
lr = 1e-3
lrs=np.array([lr/9,lr/3,lr])
learn.fit(lrs, 5, cycle_len = 1, cycle_mult = 2)

In [None]:
lr = 1e-3

List_MAE = []
#List_RMSE = []
#List_R2 = []
List_Acc = []

KFold_Iteration = 0

# K-fold cross validation
kf = KFold(n_splits=5, shuffle=True, random_state = 1)

for train_index, val_index in kf.split(Classification_df.index):
    print("\n\nKFold_Iteration", KFold_Iteration)
    print("\nLength training",len(train_index))
    print("Length validation",len(val_index))
    
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on)
    
    # Define regression dataset
    md = ImageClassifierData.from_csv(PATH, 'data_all', Regression_csv, tfms=tfms, continuous=True, 
        bs=bs, val_idxs=val_index, num_workers = 0)
    
    # Define classification dataset
    md2 = ImageClassifierData.from_csv(PATH, 'data_all', Classification_csv, tfms=tfms, continuous=False, 
        bs=bs, val_idxs=val_index, num_workers = 0)
    
    # Combine datasets
    trn_ds2 = ConcatLblDataset(md.trn_ds, md2.trn_y)
    val_ds2 = ConcatLblDataset(md.val_ds, md2.val_y)
    md.trn_dl.dataset = trn_ds2
    md.val_dl.dataset = val_ds2
    
    # Add  model
    head_reg = nn.Sequential(
        AdaptiveConcatPool2d(),
        Flatten(),
        nn.BatchNorm1d(4096),
        nn.Dropout(0.25),
        nn.Linear(4096,1+5),
    )
    
    models = ConvnetBuilder(arch, 0, 0, 0, custom_head=head_reg)
    
    learn = ConvLearner(md, models)
    # Update optimizer and loss function
    learn.opt_fn = optim.Adam
    learn.crit = detn_loss
    learn.metrics = [detn_l2, detn_acc]
    
    print("Optimizing Last layer only...")
    lr = 2e-3
    learn.fit(lr, 5)
    #learn.precompute=False
    learn.fit(lr, 5, cycle_len=1)
    print("\nOptimizing full model...")
    learn.unfreeze()
    # New learning rate
    lr = 1e-3
    lrs=np.array([lr/9,lr/3,lr])
    result = learn.fit(lrs, 5, cycle_len=1, cycle_mult=2)
    
    MAE = result[1]
    Acc = result[2]
    
    print('MAE', MAE)
    List_MAE.append(MAE)
    
    print('Acc', Acc)
    List_Acc.append(Acc)
    
    KFold_Iteration += 1


In [None]:
print("\nResults overview")
MAE_avg = np.average(List_MAE)
MAE_std = np.std(List_MAE)
print("MAE_avg",MAE_avg)
print("MAE_std",MAE_std)


Acc_avg = np.average(List_Acc)
Acc_std = np.std(List_Acc)
print("Acc_avg",Acc_avg)
print("Acc_std",Acc_std)

In [None]:
learn