## Regression - ResNet50 with K-fold

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"


In [None]:
# This file contains all the main external libs we'll use
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

import pandas as pd
import numpy as np

from sklearn.model_selection import KFold

from sklearn.metrics import *
import math


In [None]:
PATH = "../data/"
sz=224
arch=resnet50
bs=8

In [None]:
label_csv = f'{PATH}Dataset.csv'
n = len(list(open(label_csv)))-1
# Return validation indexes using a 10% split
val_idxs = get_cv_idxs(n,val_pct=0.1)
print('n:',n)
print('Nb val_idxs',len(val_idxs))
print('val_idxs',val_idxs)

In [None]:
label_df = pd.read_csv(label_csv)
label_df.head()

In [None]:
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on)
data = ImageClassifierData.from_csv(PATH, 'data_all', label_csv, tfms=tfms, continuous = True, bs=bs, val_idxs=val_idxs, num_workers=8)

# Add simple dense model (instead of default fastai model)
#head_reg = nn.Sequential(Flatten(), nn.Linear(100352,1))
learn = ConvLearner.pretrained(arch, data, xtra_fc=[], ps=0.25, precompute=False)
# Add dropout
#learn = ConvLearner.pretrained(arch, data, precompute=True, ps=0.5)

In [None]:
learn

In [None]:
lrf=learn.lr_find()

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

In [None]:
lr = 1e-1

List_MAE = []
List_RMSE = []
List_R2 = []
KFold_Iteration = 0

# K-fold cross validation
kf = KFold(n_splits=5, shuffle=True, random_state = 1)

for train_index, val_index in kf.split(label_df.index):
    print("\n\nKFold_Iteration", KFold_Iteration)
    print("\nLength training",len(train_index))
    print("Length validation",len(val_index))
    
    tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on)
    data = ImageClassifierData.from_csv(PATH, 'data_all', label_csv, tfms=tfms, continuous = True, bs=bs, val_idxs=val_index, num_workers=8)
        
    # Add simple dense model (instead of default fastai model)
    #head_reg = nn.Sequential(Flatten(), nn.Linear(100352,1))
    learn = ConvLearner.pretrained(arch, data, xtra_fc=[], ps=0.25, precompute=False)
    
    # Update optimizer and loss function
    learn.opt_fn = optim.Adam
    learn.crit = nn.L1Loss()
    
    print("Optimizing Last layer only...")
    lr = 1e-1
    learn.fit(lr, 5)
    #learn.precompute=False
    learn.fit(lr, 5, cycle_len=1)
    print("\nOptimizing full model...")
    learn.unfreeze()
    # New learning rate
    lr = 5e-4
    lrs=np.array([lr/9,lr/3,lr])
    learn.fit(lrs, 5, cycle_len=1, cycle_mult=2)
    
    print("TTA inference...")
    preds, y = learn.TTA()
    probs = np.mean(preds,0)

    # Analysis
    MAE = mean_absolute_error(y, probs)
    RMSE = math.sqrt(mean_squared_error(y, probs))
    print('MAE', MAE)
    print('RMSE', RMSE)
    List_MAE.append(MAE)
    List_RMSE.append(RMSE)
    
    # R2 calculation
    y_mean = np.mean(y, 0)
    print('y_mean', y_mean)

    SStot = np.sum((y-y_mean)**2)
    SSres = np.sum((y-probs)**2)
    R2 = 1 - (SSres/SStot)
    print('SStot', SStot)
    print('SSres', SSres)
    print('R2', R2)
    List_R2.append(R2)
    
    KFold_Iteration += 1


In [None]:
print("\nResults overview")
MAE_avg = np.average(List_MAE)
MAE_std = np.std(List_MAE)
print("MAE_avg",MAE_avg)
print("MAE_std",MAE_std)

RMSE_avg = np.average(List_RMSE)
RMSE_std = np.std(List_RMSE)
print("RMSE_avg",RMSE_avg)
print("RMSE_std",RMSE_std)

R2_avg = np.average(List_R2)
R2_std = np.std(List_R2)
print("R2_avg",R2_avg)
print("R2_std",R2_std)

In [None]:
learn