In [1]:
import warnings
warnings.filterwarnings("ignore")

import time
import os
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import seaborn as sns

import torch
import torch.nn as nn
import pytorch_lightning
from torch.utils.data import DataLoader
import torchvision.transforms as tt
import torch.optim as optim
from torch.optim import lr_scheduler
from sklearn.metrics import *
from PIL import Image
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils.class_weight import compute_class_weight

from src.utils.utils import training, testing, EarlyStopping, get_y_true_preds
from src.utils.LungDataset import LungSet
from src.model.LungNetwork import LungNet

import torch

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
seed=2024
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device

Number of available GPUs: 1


device(type='cuda', index=0)

In [2]:
# model config
baseline_path = "src/model/tenpercent_resnet18.ckpt"
BATCH_SIZE = 24
num_classes = 6
prefix = '6C'

# data config
augmented_test = False
oversampling = False
train_path = f"data/splits/{prefix}_support_augmented.csv"
test_path = f"data/splits/{prefix}_query.csv" if not augmented_test else f"data/split/{prefix}_query_augmented.csv"
train_data_path = "data/augmented_data"
test_data_path = "data/normalized_data" if not augmented_test else "data/augmented_data"


save_dir = f"checkpoints/supervised/{prefix}/" if not oversampling else f"checkpoints/supervised/{prefix}/oversampled/"
save_path_model_best = f"{save_dir}/best_model.pth"
save_path_optim_best = f"{save_dir}/best_optim.pth"
save_path_model_last = f"{save_dir}/last_model.pth"
save_path_optim_last = f"{save_dir}/last_optim.pth"

save_path_loss_history = f"{save_dir}/train_test_loss_history.npy"
save_path_acc_history = f"{save_dir}/train_test_acc_history.npy"


In [3]:
dtrain= pd.read_csv(train_path).sample(frac = 1)
dtest = pd.read_csv(test_path).sample(frac = 1)

X_train = dtrain
y_train = dtrain['label']

# Oversample using sampling with replacement s.t. each class has the same number of samples
X_ros, y_ros = RandomOverSampler(random_state=seed).fit_resample(X_train, y_train)

In [4]:
X_train

Unnamed: 0,Patient,lame,patch,classe,tetraClass,label,dataset
67621,169,Fo,Fo_169_A (342)_VF.jpg,Foetal,T,5,train
28137,230,N,N_230_A (3)_rot270.jpg,Nécrose,Né,2,train
51341,231,S,S_231_A (1715)_VF.jpg,Solide,T,5,train
32559,9,N,N_9_S (1093).tif,Nécrose,Né,2,train
66549,230,Gc,Gc_230_A (145).jpg,Glandulaire complexe,T,5,train
...,...,...,...,...,...,...,...
47643,231,F,F_231_A (152)_rot45.jpg,Fibrose,Fi,4,train
35456,231,TL,TL_231_A (22)_HF.jpg,Tissu lymphoïde,TL,3,train
51808,7,A,A_7_A (279).jpg,Acinaire,T,5,train
52730,247,A,A_247_A (2937).jpg,Acinaire,T,5,train


In [5]:
X_train['tetraClass'].value_counts(), X_ros['tetraClass'].value_counts()

(tetraClass
 P     22755
 T     21165
 Fi    12612
 Né     8994
 TL     3069
 H      3006
 Name: count, dtype: int64,
 tetraClass
 T     22755
 Né    22755
 Fi    22755
 H     22755
 TL    22755
 P     22755
 Name: count, dtype: int64)

In [6]:
if oversampling:
    X_train, y_train = X_ros, y_ros

t_train = tt.Compose([tt.RandomHorizontalFlip(), 
                    tt.RandomVerticalFlip(),
                    tt.ToTensor()])
t_test = tt.Compose([tt.ToTensor()])

In [7]:
# create a LungSet object(torch.Datatset) with:
# df(DataFrame): image_path, label
# data_path(str): directory containing the images
# transform(transforms.Compose): transformations to apply
# __iter__: return torch(Image), label
train_set = LungSet(X_train, data_path=train_data_path)
test_set = LungSet(dtest, data_path=test_data_path)

# Create the torch.Dataloader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
len(train_set), len(test_set)

(71601, 2623)

In [9]:
# Set up the model
model = LungNet(baseline_path, num_classes)
model = model.to(device)

# data paralellism in traning
# model=nn.DataParallel(model, device_ids=[0, 1])
# model=model.cuda()

criterion = nn.CrossEntropyLoss(label_smoothing=0.125)
early_stopping = EarlyStopping(patience=10, delta=0.001)
optimizer = optim.AdamW(model.parameters(), lr=0.1, weight_decay=0.01)
# allow dynamic lr reducing based on some measurement (a metric has stopped improving)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.1, patience=10, min_lr=1e-15, verbose=True)

In [10]:
print("trainable parameters",sum([p.numel() for p in model.parameters() if p.requires_grad]))

trainable parameters 70310


In [11]:
best_acc = 0
num_epoch=50
comment=" "
T=0

train_loss, test_loss, train_accuracy, test_accuracy = [], [], [], []

for epoch in range(num_epoch):
    t0 = time.time()               
    # train one epoch for each batch                                                                                                         
    epoch_train_loss, epoch_train_accuracy  = training(model, train_loader, criterion, optimizer, device)
    epoch_test_loss, epoch_test_accuracy    = testing(model, test_loader, criterion, device)

    # save the model if is currently the best
    if best_acc < epoch_test_accuracy:
        best_acc = epoch_test_accuracy
        torch.save(model.state_dict(), save_path_model_best)
        torch.save(optimizer.state_dict(), save_path_optim_best)
        comment=" (loss decreased!) new best model saved"
    
    # reduce the learning rate if test loss didn't decrease
    scheduler.step(epoch_test_loss)

    # keep track of the loss and accuracy
    train_loss.append(epoch_train_loss)
    train_accuracy.append(epoch_train_accuracy)
    test_loss.append(epoch_test_loss)
    test_accuracy.append(epoch_test_accuracy)

    # every 5 epoch save the loss and accuracy history. the model and the optimizer (lr changes)
    if epoch!=0 and epoch%5 == 0:
        np.save(save_path_loss_history, [train_loss, test_loss])
        np.save(save_path_acc_history, [train_accuracy, test_accuracy])
        torch.save(model.state_dict(), save_path_model_last)
        torch.save(optimizer.state_dict(), save_path_optim_last)
        
    print('Epoch {:g}/{:g}: TRAIN Loss={:0.5f} -- Acc={:0.3f}% '.format(epoch+1,num_epoch, epoch_train_loss, epoch_train_accuracy), end='')
    print('|| TEST Loss={:0.5f} -- Acc={:0.3f}%  --- Time={:g}min'.format(epoch_test_loss,epoch_test_accuracy, (time.time()-t0)//60), end='')
    print(comment)
    comment=" "
    T+=(time.time()-t0)

    # early_stopping(epoch_test_loss)
    # if early_stopping.early_stop:
    #     print("Early stopping.")
    #     break
print("total time : T={:.3f}h".format(T//3600))

# save the final model
np.save(save_path_loss_history, [train_loss, test_loss])
np.save(save_path_acc_history, [train_accuracy, test_accuracy])
torch.save(model.state_dict(), save_path_model_last)
torch.save(optimizer.state_dict(), save_path_optim_last)

100%|██████████| 2984/2984 [21:14<00:00,  2.34it/s]


Epoch 1/50: TRAIN Loss=0.73201 -- Acc=91.110% || TEST Loss=6.32105 -- Acc=42.699%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [21:24<00:00,  2.32it/s]


Epoch 2/50: TRAIN Loss=0.70438 -- Acc=92.454% || TEST Loss=0.88435 -- Acc=82.920%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [21:28<00:00,  2.32it/s]


Epoch 3/50: TRAIN Loss=0.70650 -- Acc=92.465% || TEST Loss=0.85206 -- Acc=84.178%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [21:28<00:00,  2.32it/s]


Epoch 4/50: TRAIN Loss=0.70124 -- Acc=92.682% || TEST Loss=0.97080 -- Acc=76.363%  --- Time=22min 


100%|██████████| 2984/2984 [21:42<00:00,  2.29it/s]


Epoch 5/50: TRAIN Loss=0.70220 -- Acc=92.788% || TEST Loss=0.87838 -- Acc=84.979%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 6/50: TRAIN Loss=0.69728 -- Acc=92.888% || TEST Loss=0.87108 -- Acc=82.768%  --- Time=22min 


100%|██████████| 2984/2984 [22:09<00:00,  2.24it/s]


Epoch 7/50: TRAIN Loss=0.70001 -- Acc=92.659% || TEST Loss=1.12169 -- Acc=70.187%  --- Time=22min 


100%|██████████| 2984/2984 [24:13<00:00,  2.05it/s]


Epoch 8/50: TRAIN Loss=0.69936 -- Acc=92.916% || TEST Loss=1.09008 -- Acc=73.046%  --- Time=24min 


100%|██████████| 2984/2984 [21:50<00:00,  2.28it/s]


Epoch 9/50: TRAIN Loss=0.70272 -- Acc=92.863% || TEST Loss=1.39751 -- Acc=61.914%  --- Time=22min 


100%|██████████| 2984/2984 [21:30<00:00,  2.31it/s]


Epoch 10/50: TRAIN Loss=0.70132 -- Acc=92.740% || TEST Loss=0.87281 -- Acc=79.260%  --- Time=22min 


100%|██████████| 2984/2984 [21:56<00:00,  2.27it/s]


Epoch 11/50: TRAIN Loss=0.69914 -- Acc=92.806% || TEST Loss=1.13961 -- Acc=66.069%  --- Time=22min 


100%|██████████| 2984/2984 [21:44<00:00,  2.29it/s]


Epoch 12/50: TRAIN Loss=0.69923 -- Acc=92.979% || TEST Loss=0.80479 -- Acc=85.856%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [21:57<00:00,  2.27it/s]


Epoch 13/50: TRAIN Loss=0.70012 -- Acc=92.880% || TEST Loss=0.90938 -- Acc=80.938%  --- Time=22min 


100%|██████████| 2984/2984 [21:59<00:00,  2.26it/s]


Epoch 14/50: TRAIN Loss=0.70371 -- Acc=92.721% || TEST Loss=0.94754 -- Acc=78.193%  --- Time=22min 


100%|██████████| 2984/2984 [21:30<00:00,  2.31it/s]


Epoch 15/50: TRAIN Loss=0.69980 -- Acc=92.844% || TEST Loss=1.71245 -- Acc=75.372%  --- Time=22min 


100%|██████████| 2984/2984 [21:46<00:00,  2.28it/s]


Epoch 16/50: TRAIN Loss=0.70068 -- Acc=92.803% || TEST Loss=0.88484 -- Acc=82.387%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 17/50: TRAIN Loss=0.70069 -- Acc=92.849% || TEST Loss=1.08640 -- Acc=70.301%  --- Time=22min 


100%|██████████| 2984/2984 [22:13<00:00,  2.24it/s]


Epoch 18/50: TRAIN Loss=0.70129 -- Acc=92.883% || TEST Loss=0.84804 -- Acc=82.882%  --- Time=22min 


100%|██████████| 2984/2984 [22:19<00:00,  2.23it/s]


Epoch 19/50: TRAIN Loss=0.69824 -- Acc=92.920% || TEST Loss=0.99300 -- Acc=73.580%  --- Time=23min 


100%|██████████| 2984/2984 [22:13<00:00,  2.24it/s]


Epoch 20/50: TRAIN Loss=0.70274 -- Acc=92.846% || TEST Loss=1.07996 -- Acc=73.732%  --- Time=22min 


100%|██████████| 2984/2984 [21:39<00:00,  2.30it/s]


Epoch 21/50: TRAIN Loss=0.70072 -- Acc=92.717% || TEST Loss=0.80723 -- Acc=86.085%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [22:05<00:00,  2.25it/s]


Epoch 22/50: TRAIN Loss=0.70357 -- Acc=92.620% || TEST Loss=0.94067 -- Acc=79.337%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 00023: reducing learning rate of group 0 to 1.0000e-02.
Epoch 23/50: TRAIN Loss=0.70204 -- Acc=92.806% || TEST Loss=0.81354 -- Acc=84.788%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 24/50: TRAIN Loss=0.63885 -- Acc=94.870% || TEST Loss=0.74273 -- Acc=88.639%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [22:07<00:00,  2.25it/s]


Epoch 25/50: TRAIN Loss=0.63131 -- Acc=95.244% || TEST Loss=0.87411 -- Acc=83.263%  --- Time=22min 


100%|██████████| 2984/2984 [22:06<00:00,  2.25it/s]


Epoch 26/50: TRAIN Loss=0.62845 -- Acc=95.377% || TEST Loss=0.81751 -- Acc=85.818%  --- Time=22min 


100%|██████████| 2984/2984 [22:02<00:00,  2.26it/s]


Epoch 27/50: TRAIN Loss=0.62782 -- Acc=95.381% || TEST Loss=0.79108 -- Acc=87.076%  --- Time=22min 


100%|██████████| 2984/2984 [21:58<00:00,  2.26it/s]


Epoch 28/50: TRAIN Loss=0.62867 -- Acc=95.358% || TEST Loss=0.86671 -- Acc=82.501%  --- Time=22min 


100%|██████████| 2984/2984 [21:59<00:00,  2.26it/s]


Epoch 29/50: TRAIN Loss=0.62908 -- Acc=95.356% || TEST Loss=0.80808 -- Acc=86.275%  --- Time=22min 


100%|██████████| 2984/2984 [22:01<00:00,  2.26it/s]


Epoch 30/50: TRAIN Loss=0.63091 -- Acc=95.325% || TEST Loss=0.88756 -- Acc=81.014%  --- Time=22min 


100%|██████████| 2984/2984 [21:57<00:00,  2.26it/s]


Epoch 31/50: TRAIN Loss=0.63010 -- Acc=95.212% || TEST Loss=0.79415 -- Acc=85.551%  --- Time=22min 


100%|██████████| 2984/2984 [22:02<00:00,  2.26it/s]


Epoch 32/50: TRAIN Loss=0.63126 -- Acc=95.271% || TEST Loss=0.85758 -- Acc=84.598%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.26it/s]


Epoch 33/50: TRAIN Loss=0.63046 -- Acc=95.286% || TEST Loss=0.73090 -- Acc=90.393%  --- Time=22min (loss decreased!) new best model saved


100%|██████████| 2984/2984 [22:03<00:00,  2.26it/s]


Epoch 34/50: TRAIN Loss=0.63163 -- Acc=95.239% || TEST Loss=0.79596 -- Acc=86.618%  --- Time=22min 


100%|██████████| 2984/2984 [22:02<00:00,  2.26it/s]


Epoch 35/50: TRAIN Loss=0.63087 -- Acc=95.291% || TEST Loss=0.83671 -- Acc=85.475%  --- Time=22min 


100%|██████████| 2984/2984 [22:02<00:00,  2.26it/s]


Epoch 36/50: TRAIN Loss=0.63157 -- Acc=95.328% || TEST Loss=0.81339 -- Acc=85.513%  --- Time=22min 


100%|██████████| 2984/2984 [22:01<00:00,  2.26it/s]


Epoch 37/50: TRAIN Loss=0.63376 -- Acc=95.150% || TEST Loss=0.99614 -- Acc=77.964%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 38/50: TRAIN Loss=0.63105 -- Acc=95.194% || TEST Loss=0.77517 -- Acc=87.762%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 39/50: TRAIN Loss=0.63178 -- Acc=95.194% || TEST Loss=0.76459 -- Acc=88.296%  --- Time=22min 


100%|██████████| 2984/2984 [22:06<00:00,  2.25it/s]


Epoch 40/50: TRAIN Loss=0.63138 -- Acc=95.281% || TEST Loss=0.88287 -- Acc=81.205%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 41/50: TRAIN Loss=0.63136 -- Acc=95.249% || TEST Loss=0.86777 -- Acc=83.416%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.25it/s]


Epoch 42/50: TRAIN Loss=0.63057 -- Acc=95.285% || TEST Loss=0.75962 -- Acc=87.991%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.26it/s]


Epoch 43/50: TRAIN Loss=0.63252 -- Acc=95.159% || TEST Loss=0.77762 -- Acc=87.686%  --- Time=22min 


100%|██████████| 2984/2984 [22:04<00:00,  2.25it/s]


Epoch 00044: reducing learning rate of group 0 to 1.0000e-03.
Epoch 44/50: TRAIN Loss=0.63172 -- Acc=95.236% || TEST Loss=0.85648 -- Acc=84.293%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.25it/s]


Epoch 45/50: TRAIN Loss=0.61596 -- Acc=95.824% || TEST Loss=0.83677 -- Acc=85.818%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.25it/s]


Epoch 46/50: TRAIN Loss=0.61374 -- Acc=95.876% || TEST Loss=0.80684 -- Acc=86.580%  --- Time=22min 


100%|██████████| 2984/2984 [22:03<00:00,  2.25it/s]


Epoch 47/50: TRAIN Loss=0.61367 -- Acc=95.980% || TEST Loss=0.79230 -- Acc=86.618%  --- Time=22min 


100%|██████████| 2984/2984 [22:05<00:00,  2.25it/s]


Epoch 48/50: TRAIN Loss=0.61357 -- Acc=95.941% || TEST Loss=0.78895 -- Acc=87.038%  --- Time=22min 


100%|██████████| 2984/2984 [21:54<00:00,  2.27it/s]


Epoch 49/50: TRAIN Loss=0.61281 -- Acc=95.993% || TEST Loss=0.83060 -- Acc=85.551%  --- Time=22min 


100%|██████████| 2984/2984 [21:55<00:00,  2.27it/s]


Epoch 50/50: TRAIN Loss=0.61196 -- Acc=96.073% || TEST Loss=0.80750 -- Acc=86.237%  --- Time=22min 
total time : T=18.000h
