In [1]:
import warnings
warnings.filterwarnings("ignore")

import time
import os
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import seaborn as sns

import torch
import torch.nn as nn
import pytorch_lightning
from torch.utils.data import DataLoader
import torchvision.transforms as tt
import torch.optim as optim
from torch.optim import lr_scheduler
from sklearn.metrics import *
from PIL import Image
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils.class_weight import compute_class_weight

from src.utils.utils import training, testing, EarlyStopping, get_y_true_preds
from src.utils.LungDataset import LungSet
from src.model.LungNetwork import LungNet

import torch

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
seed=2024
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
#config
train_path = "data/dataframes/df_train_7C_augmented.csv"
test_path = "data/dataframes/df_test_7C_augmented.csv"
data_path = "data/augmented_data"
oversampling = True

baseline_path = "src/model/tenpercent_resnet18.ckpt"
BATCH_SIZE = 128

num_classes = 7

In [3]:
dtrain= pd.read_csv(train_path).sample(frac = 1)
dtest = pd.read_csv(test_path).sample(frac = 1)

X_train = dtrain
y_train = dtrain['label']

X_ros, y_ros = RandomOverSampler(random_state=seed).fit_resample(X_train, y_train)

In [4]:
X_train

Unnamed: 0,Patient,lame,patch,classe,label,dataset,TetraClass
46764,6,L,L_6_Z (10)_HF.jpg,Lépidique,5,train,TuGr12
5394,3,No,No_3_C(442).jpg,Normal,0,train,P
61028,230,Gc,Gc_230_A (283).jpg,Glandulaire complexe,6,train,TuGr3
66207,231,S,S_231_A (380)_HF.jpg,Solide,6,train,TuGr3
27189,9,N,N_9_S (1116).tif,Nécrose,2,train,Né
...,...,...,...,...,...,...,...
47643,7,A,A_7_A (181)_HF.jpg,Acinaire,5,train,TuGr12
35456,7,F,F_7_A (353)_rot270.jpg,Fibrose,4,train,Fi
51808,7,A,A_7_A (143).jpg,Acinaire,5,train,TuGr12
52730,13,D,13_D_row_97_col_29.jpg,Acinaire,5,train,TuGr12


In [5]:
X_train['TetraClass'].value_counts(), X_ros['TetraClass'].value_counts()

(TetraClass
 P         17634
 Fi        12390
 TuGr3     10341
 TuGr12     9964
 Né         8994
 H          3960
 TL         3366
 Name: count, dtype: int64,
 TetraClass
 TuGr12    17634
 P         17634
 TuGr3     17634
 Né        17634
 Fi        17634
 H         17634
 TL        17634
 Name: count, dtype: int64)

In [6]:
if oversampling:
    X_train, y_train = X_ros, y_ros

In [7]:
t_train = tt.Compose([tt.RandomHorizontalFlip(), 
                    tt.RandomVerticalFlip(),
                    tt.ToTensor()])
t_test = tt.Compose([tt.ToTensor()])

In [8]:
# create a LungSet object(torch.Datatset) with:
# df(DataFrame): image_path, label
# data_path(str): directory containing the images
# transform(transforms.Compose): transformations to apply
# __iter__: return Image, label
train_set = LungSet(X_train, data_path=data_path)
test_set = LungSet(dtest, data_path=data_path)

# Create the torch.Dataloader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

In [9]:
# Set up the model
model = LungNet(baseline_path, num_classes)
# data paralellism in traning 
# model=nn.DataParallel(model, device_ids=[0, 1])
# model=model.cuda()
# print("trainable parameters",sum([p.numel() for p in model.parameters() if p.requires_grad]))

criterion = nn.CrossEntropyLoss(label_smoothing=0.125)
early_stopping = EarlyStopping(patience=10, delta=0.001)
optimizer = optim.AdamW(model.parameters(), lr=0.1, weight_decay=0.01)
# allow dynamic lr reducing based on some measurement (a metric has stopped improving)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.1, patience=10, min_lr=1e-15, verbose=True)

In [12]:
# check avilable gpus
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [13]:
best_acc = 0
num_epoch=100
comment=" "
T=0

train_loss, test_loss, train_accuracy, test_accuracy = [], [], [], []

for epoch in range(num_epoch):
    t0 = time.time()               
    # train one epoch for each batch                                                                                                         
    epoch_train_loss, epoch_train_accuracy  = training(model, train_loader, criterion, optimizer, device)
    epoch_test_loss, epoch_test_accuracy    = testing(model, test_loader, criterion, device)

    # save the model if is currently the best
    if best_acc < epoch_test_accuracy:
        best_acc = epoch_test_accuracy
        torch.save(model.state_dict(),     f"model_checkpoints/best_model_{num_classes}C.pth")
        torch.save(optimizer.state_dict(), f"model_checkpoints/best_optimizer_{num_classes}C.pth")
        comment=" (loss decreased!) new best model saved"
    
    # reduce the learning rate if test loss didn't decrease
    scheduler.step(epoch_test_loss)

    # keep track of the loss and accuracy
    train_loss.append(epoch_train_loss)
    train_accuracy.append(epoch_train_accuracy)
    test_loss.append(epoch_test_loss)
    test_accuracy.append(epoch_test_accuracy)

    # every 5 epoch save the loss and accuracy history. the model and the optimizer (lr changes)
    if epoch!=0 and epoch%5 == 0:
        np.save(f'evaluation/{num_classes}c_train_test_loss.npy', [train_loss, test_loss])
        np.save(f'evaluation/{num_classes}c_train_test_accuracies.npy', [train_accuracy, test_accuracy])
        torch.save(model.state_dict(),     f"model_checkpoints/model_{num_classes}C.pth")
        torch.save(optimizer.state_dict(), f"model_checkpoints/optimizer_{num_classes}C.pth")
        
    print('Epoch {:g}/{:g}: TRAIN Loss={:0.5f} -- Acc={:0.3f}% '.format(epoch+1,num_epoch, epoch_train_loss, epoch_train_accuracy), end='')
    print('|| TEST Loss={:0.5f} -- Acc={:0.3f}%  --- Time={:g}min'.format(epoch_test_loss,epoch_test_accuracy, (time.time()-t0)//60), end='')
    print(comment)
    comment=" "
    T+=(time.time()-t0)

    # early_stopping(epoch_test_loss)
    # if early_stopping.early_stop:
    #     print("Early stopping.")
    #     break
print("total time : T={:.3f}h".format(T//3600))

# save the final model
np.save(f'evaluation/{num_classes}c_train_test_loss.npy', [train_loss, test_loss])
np.save(f'evaluation/{num_classes}c_train_test_accuracies.npy', [train_accuracy, test_accuracy])
torch.save(model.state_dict(),     f"model_checkpoints/model_{num_classes}C.pth")
torch.save(optimizer.state_dict(), f"model_checkpoints/optimizer_{num_classes}C.pth")

  0%|          | 0/965 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'data/augmented_data/Solide/S_231_A (302).jpg'