# Model training

In [8]:
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn.metrics

import torch
import torchvision.transforms as transforms
import torchvision.models
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler

import time

import data_reader
import dataset

In [9]:
# Choosing device for tensor processing

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using GPU :", device)
else:
    device = torch.device("cpu")
    print("Using CPU ")

Using GPU : cuda:0


In [10]:
# Training and data variables

MODE = "w"
EXP_NAME = "10ep"

DATA_SET_NAME = f"data_set_x20_1%_split"
PATCH_SIZE = 512

BATCH_SIZE = 16
EPOCHS = 25

SIZE_X = PATCH_SIZE
SIZE_Y = PATCH_SIZE

threshold = 0.05 # Loss difference for early stopping

In [11]:
def init_weights(m): # XAVIER initialization for final layer weight initialization
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

def init():
    global net, loss_function, scheduler, optimizer, MODEL_NAME

    net = torchvision.models.resnet18(pretrained=True).to(device)
    
    for param in net.parameters():
        param.requires_grad = False # Freezing the convolutional layers

    #for param in net.layer4[1].parameters():
    #    param.requires_grad = True # Unfreezing the last residual block
    
    net.fc = nn.Sequential(
                nn.Linear(512, 128),
                nn.ReLU(inplace=True),
                nn.Linear(128, 3),
                nn.Softmax(dim = -1)
                ).to(device)

    net.fc.apply(init_weights) # Xavier init

    #print(f"Loading {MODEL_NAME}")
    #net = torch.load(f"C:\\Users\\Alejandro\\Desktop\\heterogeneous-data\\results\\WSI\\models\\{MODEL_NAME}.pth") # Model loading

    n_params = sum(p.numel() for p in net.fc.parameters())# + sum(p.numel() for p in net.layer4[1].parameters())
    print("Number of free parameters: ", n_params)

    #Hyperparameters:
    learning_rate = 1E-3 # LR
    loss_function = nn.BCELoss()  # Loss # [1,0] es positivo y [0,1] negativo
    optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08)# Optimizer
    lambda1 = lambda epoch: 1 ** epoch # Scheduler
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)


In [12]:
def fwd_pass(X, y, train=False):
# IMPORTANTE, TRAIN = FALSE PARA QUE NO ENTRENE CON EL TEST DATA ESTO ES PARA PODER HACER TEST MIENTRAS ENTRENO Y VALIDO, 
# SE ESPERA QUE LA EXACTITUD EN EL TEST DE VALIDACIÓN SEA MENOR
    if train: 
        net.zero_grad()
        
    # NORMALIZATION
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = torchvision.transforms.Normalize(mean=mean, std=std)

    for i, x in enumerate(X):
        X[i] = normalize(X[i]/255) # Np array

    outputs = net(X)
    #matches = [torch.argmax(i) == torch.argmax(j) for i,j in zip(outputs, y)]
    y_pred = [torch.argmax(i) for i in outputs.cpu()]
    y_true = [torch.argmax(i) for i in y.cpu()]

    #acc = matches.count(True)/len(matches)
    loss = loss_function(outputs, y)
    conf_m = sklearn.metrics.confusion_matrix(y_true, y_pred, labels=[0, 1, 2])
    acc = sklearn.metrics.accuracy_score(y_true, y_pred)
    f1 = sklearn.metrics.f1_score(y_true, y_pred, average= "micro")
    auc = 0 #sklearn.metrics.roc_auc_score(y_true, y_pred, average= "micro", multi_class="ovr") #!!!!
    
    if train:
        loss.backward() # Calculate gradients using backprop
        optimizer.step() # Updates W and b using previously calculated gradients

    return [acc, loss, conf_m, f1, auc]

In [17]:
def train():
  global net, loss_function, scheduler, threshold, optimizer, train_set, val_set, MODEL_NAME, EPOCHS
  
  print(MODEL_NAME)
  patience = 0

  with open(f"C:\\Users\\Alejandro\\Desktop\\heterogeneous-data\\results\\WSI\\log\\model_{MODEL_NAME}.log", MODE) as f:
    for epoch in range(EPOCHS):
      acc, loss, f1, auc = 0, 0, 0, 0
      val_acc, val_loss, val_f1, val_auc = 0, 0, 0, 0
      conf_m, val_conf_m = np.zeros((3,3)), np.zeros((3,3))

      print("\nEPOCH: ", epoch+1)

      for batch_X, batch_y in tqdm(iter(train_dataloader)):

        batch_X, batch_y = batch_X.type(torch.FloatTensor).to(device).permute(0, 3, 2, 1), batch_y.type(torch.FloatTensor).to(device) 
        
        net.train() # Making sure that the model is in training mode
        
        performance_metrics = fwd_pass(batch_X, batch_y, train=True)
        acc_aux, loss_aux, conf_m_aux, f1_aux, auc_aux = performance_metrics[0], performance_metrics[1], performance_metrics[2], performance_metrics[3], performance_metrics[4]
        acc += acc_aux*(len(batch_X)/len(train_set)) # Calculating the average loss and acc through batches sum ACCi*Wi/N (Wi = weight of the batch)
        loss += loss_aux*(len(batch_X)/len(train_set))
        conf_m += conf_m_aux
        f1 += f1_aux*(len(batch_X)/len(train_set))
        auc += auc_aux*(len(batch_X)/len(train_set))
        """
        i += 1
    
        if i%100 == 0:
          print("Memory allocated in GPU: ", torch.cuda.memory_allocated("cuda:0")/1024/1024/1024)
        """
        
      for batch_X, batch_y in tqdm(iter(val_dataloader)):

        batch_X, batch_y = batch_X.type(torch.FloatTensor).to(device).permute(0, 3, 2, 1), batch_y.type(torch.FloatTensor).to(device)

        net.eval() # Making sure that the model is not training and deactivate droptout
        
        with torch.no_grad(): # Disable all computations, works together with net.eval()
          performance_metrics = fwd_pass(batch_X, batch_y, train=False)
          
        acc_aux, loss_aux, conf_m_aux, f1_aux, auc_aux = performance_metrics[0], performance_metrics[1], performance_metrics[2], performance_metrics[3], performance_metrics[4]  
        val_acc += acc_aux*(len(batch_X)/len(val_set)) # Calculating the average loss and acc through batches sum ACCi*Wi/N (Wi = weight of the batch)
        val_loss += loss_aux*(len(batch_X)/len(val_set))
        val_conf_m += conf_m_aux
        val_f1 += f1_aux*(len(batch_X)/len(val_set))
        val_auc += auc_aux*(len(batch_X)/len(val_set))
      
      print("Val loss: ", val_loss.item()," Train loss: ", loss.item(), "\n")
      print("Val acc: ", val_acc," Train acc: ", acc, "\n")
      print("Val AUC: ", val_auc,"Train AUC: ", auc)
      print("Val f1: ", val_f1," Train f1: ", f1, "\n")
      print("Val CONF: \n", val_conf_m,"\nTrain CONF: \n", conf_m, "\n")

      conf_m = f"{conf_m[0][0]}+{conf_m[0][1]}+{conf_m[1][0]}+{conf_m[1][1]}"
      val_conf_m = f"{val_conf_m[0][0]}+{val_conf_m[0][1]}+{val_conf_m[1][0]}+{val_conf_m[1][1]}"
    
      f.write(f"{MODEL_NAME},{round(time.time(),3)},{round(float(acc),3)},{round(float(loss),4)},{conf_m},{round(float(auc),4)},")
      f.write(f"{round(float(val_acc),3)},{round(float(val_loss),4)},{val_conf_m}, {round(float(val_auc),4)}\n")
      f.write("\n\n")

      # Early stopping, if the difference between loss and validation loss 
      # is bigger than the threshold for 3 epochs in a row training is stopped
      if loss.item()-val_loss.item()> threshold:
        patience +=1
      else:
        patience = 0

      print("Learning Rate: ", optimizer.param_groups[0]["lr"])
      scheduler.step() # Changing the learning rate

      if patience >= 4:
        print("Stopping early: ")
        break

    torch.save(net, f"C:\\Users\\Alejandro\\Desktop\\heterogeneous-data\\results\\WSI\\models\\{MODEL_NAME}.pth")

In [18]:
# Choosing only the images from the according splits (10CV)

SPLITS = 1 # Number of iterations > n_splits
n_splits = 10 # Number of splits to use

for SPLIT in range(SPLITS):
    MODEL_NAME = EXP_NAME + f"{SPLIT}"

    TRAIN_SPLITS = list(range(n_splits))
    # We take out the SPLIT and SPLIT+1 sets for val and testing
    if SPLIT == n_splits: # For the final split for validation we take the firs one for test
        TRAIN_SPLITS.remove(0) 
    else:
        TRAIN_SPLITS.remove(SPLIT+1)
    TRAIN_SPLITS.remove(SPLIT)

    train_set = dataset.PatchDataset([], [], [])
    val_set = dataset.PatchDataset([], [], [])
    test_set = dataset.PatchDataset([], [], [])

    # Loading training splits:
    for i in TRAIN_SPLITS:
        print(f"Loading training split: {i}")
        SPLIT_NAME = DATA_SET_NAME + f"{i}"
        
        X_, y_, _ , _ = data_reader.read_lmdb(f"D:/data/WSI/COAD/patches/{SPLIT_NAME}")

        train_set.inputs.extend(X_)
        train_set.labels.extend(y_)

    # Random oversampler

    y =  [i[0] for i in train_set.labels]
    positive = sum(y)
    class_sample_count = np.array([len(y)-positive ,positive])
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in y])
    samples_weight = torch.from_numpy(samples_weight) # Probability for a sample to be sampled
    #samples_weight = torch.tensor([1/len(samples_weight)]*len(samples_weight))
    sampler = WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))#, replacement=False)
    # Will take len(samples_weight) number of samples, this can be changed
    
    # Creating DataLoader
    train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, sampler=sampler)
    
    print(f"Patches for training: {len(train_set)}\n")
    
    # Loading validation splits:
    SPLIT_NAME = DATA_SET_NAME + f"{SPLIT}"

    print(f"Loading validation split: {SPLIT}")

    X_, y_, _ , _ = data_reader.read_lmdb(f"D:/data/WSI/COAD/patches/{SPLIT_NAME}")

    val_set.inputs.extend(X_)
    val_set.labels.extend(y_)

    val_dataloader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)

    print(f"Patches for validation: {len(val_set)}\n")

    # Loading validation splits:
    if SPLIT == n_splits:
        SPLIT_NAME = DATA_SET_NAME + f"{0}"
        print(f"Loading test split: {0}")
    else:
        SPLIT_NAME = DATA_SET_NAME + f"{SPLIT+1}"
        print(f"Loading test split: {SPLIT+1}")

    X_, y_, _ , _ = data_reader.read_lmdb(f"D:/data/WSI/COAD/patches/{SPLIT_NAME}")

    test_set.inputs.extend(X_)
    test_set.labels.extend(y_)

    test_dataloader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

    print(f"Patches for test: {len(test_set)}\n")

    init()

    train()

    del train_set, val_set, test_set


Loading training split: 2
Read lmdb
Loading training split: 3
Read lmdb
Loading training split: 4
Read lmdb
Loading training split: 5
Read lmdb
Loading training split: 6
Read lmdb
Loading training split: 7
Read lmdb
Loading training split: 8
Read lmdb
Loading training split: 9
Read lmdb
Patches for training: 7691

Loading validation split: 0
Read lmdb
Patches for validation: 994

Loading test split: 1
Read lmdb
Patches for test: 1004

Number of free parameters:  66051
10ep0

EPOCH:  1


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.6769677996635437  Train loss:  0.4159485101699829 

Val acc:  0.44768611670020103  Train acc:  0.7174619685346523 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.44768611670020103  Train f1:  0.7174619685346523 

Val CONF: 
 [[ 82.   0.   0.]
 [306. 333.  52.]
 [ 69. 122.  30.]] 
Train CONF: 
 [[3351.  433.   25.]
 [ 637. 2108.  113.]
 [ 232.  733.   59.]] 

Learning Rate:  0.001

EPOCH:  2


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.4576139748096466  Train loss:  0.36717677116394043 

Val acc:  0.694164989939638  Train acc:  0.7497074502665464 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.694164989939638  Train f1:  0.7497074502665464 

Val CONF: 
 [[ 76.   6.   0.]
 [ 77. 614.   0.]
 [ 14. 207.   0.]] 
Train CONF: 
 [[3494.  371.   13.]
 [ 531. 2238.   35.]
 [ 158.  817.   34.]] 

Learning Rate:  0.001

EPOCH:  3


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.5068979263305664  Train loss:  0.34317702054977417 

Val acc:  0.6509054325955737  Train acc:  0.7725913405278909 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.6509054325955737  Train f1:  0.7725913405278909 

Val CONF: 
 [[ 78.   4.   0.]
 [124. 563.   4.]
 [ 31. 184.   6.]] 
Train CONF: 
 [[3486.  289.    6.]
 [ 416. 2443.   18.]
 [ 152.  868.   13.]] 

Learning Rate:  0.001

EPOCH:  4


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.5053916573524475  Train loss:  0.3222598135471344 

Val acc:  0.682092555331992  Train acc:  0.7881939929788082 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.682092555331992  Train f1:  0.7881939929788082 

Val CONF: 
 [[ 78.   4.   0.]
 [ 91. 600.   0.]
 [ 18. 203.   0.]] 
Train CONF: 
 [[3542.  262.    6.]
 [ 370. 2496.   28.]
 [ 129.  834.   24.]] 

Learning Rate:  0.001

EPOCH:  5


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.43061307072639465  Train loss:  0.31929072737693787 

Val acc:  0.7364185110663984  Train acc:  0.785983617214928 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.7364185110663984  Train f1:  0.785983617214928 

Val CONF: 
 [[ 65.  17.   0.]
 [ 24. 667.   0.]
 [  7. 214.   0.]] 
Train CONF: 
 [[3457.  257.    6.]
 [ 342. 2555.   30.]
 [ 114.  897.   33.]] 

Learning Rate:  0.001

EPOCH:  6


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Val loss:  0.47104737162590027  Train loss:  0.29598432779312134 

Val acc:  0.7012072434607642  Train acc:  0.8096476400988175 

Val AUC:  0.0 Train AUC:  0.0
Val f1:  0.7012072434607642  Train f1:  0.8096476400988175 

Val CONF: 
 [[ 74.   8.   0.]
 [ 68. 623.   0.]
 [ 11. 210.   0.]] 
Train CONF: 
 [[3.654e+03 1.710e+02 3.000e+00]
 [2.920e+02 2.542e+03 3.400e+01]
 [1.110e+02 8.530e+02 3.100e+01]] 

Learning Rate:  0.001

EPOCH:  7


  0%|          | 0/481 [00:00<?, ?it/s]

In [None]:
def learning_curve(NAME):
  fig, axs = plt.subplots(2, figsize = (7,6))

  acc_df = pd.read_csv(f"C:\\Users\\Alejandro\\Desktop\\heterogeneous-data\\results\\WSI\\log\\model_{NAME}.log")
  acc_df.columns = ["MODEL_NAME", "TIME", "ACC", "LOSS", "CONF_M", "AUC",
                     "VAL_ACC", "VAL_LOSS", "VAL_CONF_M", "VAL_AUC"]

  fig, axs = plt.subplots(2, figsize=(5,7))

  axs[0].legend("MODEL_NAME", loc=2)

  acc_df.plot(y="ACC", ax=axs[0])
  acc_df.plot(y="VAL_ACC", ax=axs[0])

  acc_df.plot(y="LOSS", ax=axs[1])
  acc_df.plot(y="VAL_LOSS",ax=axs[1])

  fig.show()
  fig.savefig(f"C:\\Users\\Alejandro\\Desktop\\heterogeneous-data\\results\\WSI\\lc\\l_curve_{NAME}.pdf")

In [4]:
for SPLIT in range(1):
    NAME = EXP_NAME + f"{SPLIT}"
    print(NAME)
    learning_curve(NAME)

NameError: name 'EXP_NAME' is not defined