In [1]:
import os, sys, json
import shutil
import datetime
import torch
from torch.optim import AdamW, SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau

from omegaconf import OmegaConf
from logger.logger import Logger
from trainer.trainer import Trainer
from utils.util import *

from model.modelloader import COVID_PVTv2, COVID_ViT, load_checkpoint
from dataloader.dataloader import present_dataset
from trainer.trainer import Trainer

from torch.utils.tensorboard import SummaryWriter


cwd = os.getcwd()

# Load config
config_file = 'config/trainer_config.yml'
train_config = OmegaConf.load((os.path.join(cwd, config_file)))['trainer']
seeding(train_config)

In [2]:
present_dataset(train_config)

Collecting data from : C:/Users/ASUS/COVID_19/data/train_split.txt
Collecting data from : C:/Users/ASUS/COVID_19/data/val_split.txt
Collecting data from : C:/Users/ASUS/COVID_19/data/test_split.txt
Name of the dataset: COVIDXR_dataset
Collected from the description of these github: https://github.com/lindawangg/COVID-Net
--------------------------------------------------
Labels and quantities of samples in train dataset: 
  normal       9992
pneumonia    7205
covid-19     2401
Name: labels, dtype: int64
For each epoch we randomly get 2401 samples for each class in train dataset
--------------------------------------------------
Labels and quantities of samples in val dataset: 
  normal       100
pneumonia    100
covid-19      93
Name: labels, dtype: int64
--------------------------------------------------
Labels and quantities of samples in test dataset: 
  covid-19     198
pneumonia    105
normal       100
Name: labels, dtype: int64
--------------------------------------------------


# Ultility functions

In [3]:
def get_model(name):
     if  name == 'ViT':
        return COVID_ViT()
     if  name == 'PVT_V2':
        return COVID_PVTv2()

In [4]:
def select_scheduler_optimizer(model, config):
    opt = config['optimizer']['type']
    lr = config['optimizer']['lr']
    dec = config['optimizer']['weight_decay']
    optimizer = None
    if (opt == 'AdamW'):
        print("Use optimizer Adam with lr: ", lr)
        optimizer = AdamW(model.parameters(), lr=lr, weight_decay=dec)
    elif (opt == 'SGD'):
        print("Use optimizer SGD with lr: ", lr)
        optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)
        
    scheduler = ReduceLROnPlateau(optimizer, factor=config['scheduler']['scheduler_factor'],
                                      patience=config['scheduler']['scheduler_patience'],
                                      min_lr=config['scheduler']['scheduler_min_lr'],
                                      verbose=config['scheduler']['scheduler_verbose'])
    return optimizer, scheduler

# Train process

In [5]:
def engine(config):
    now = datetime.datetime.now()
    dt_string = now.strftime("%d_%m_%Y_%H.%M.%S")
    model_name = input("Choose model in your configuration: {model_ViT}, {model_PVT_V2} ? : ")
    assert model_name in ['model_ViT', 'model_PVT_V2'], "You must decleare the model as in description!"
    print('-'*50)
    # Writer
    writer = SummaryWriter('./runs/' + f'model_{config[model_name].name}/date_{dt_string}')

    # Device
    device = torch.device("cuda:0" if (torch.cuda.is_available() and config.cuda) else "cpu")
    
    # Model
    model = get_model(config[model_name].name)
    
    #Optimizer
    optimizer, scheduler = select_scheduler_optimizer(model, config[model_name])
    
    # Remaining epochs
    start_epoch = 0
    
    # Load model from checkpoint if config load = True
    if config.load:
        print('----- LOADING CHECKPOINTS -----')
        get_checkpoints(config[model_name].name)
        checkpoint_name = input("Choose one of these checkpoints above: ")
        cpkt_fol_name = os.path.join(config.cwd, f'checkpoints/model_{config[model_name].name}/{checkpoint_name}')   
        checkpoint_dirmodel = f'{cpkt_fol_name}/model_best_checkpoint.pth'
        model, optimizer, scheduler, start_epoch = load_checkpoint(checkpoint_dirmodel, model, optimizer, scheduler) 
        optimizer_to_cuda(optimizer, device)
        
        print(f'Optim: {optimizer}')
        print(f'Scheduler: {scheduler._last_lr}')
        print(f'Start epoch: {start_epoch}')
    
    # Add to GPU (if able)
    model = torch.nn.DataParallel(model).to(device)
    
    # Create new checkpoint
    cpkt_fol_name = os.path.join(config.cwd, f'checkpoints/model_{config[model_name].name}/date_{dt_string}')
    
    # Logger
    logname = str('LOG_' + config[model_name].name)
    log = Logger(path=cpkt_fol_name, name=logname).get_logger()
    log.info(f"Checkpoint Folder {cpkt_fol_name} ")
    
    log.info(f"date and time = {dt_string}")
    log.info(f'pyTorch VERSION:{torch.__version__}', )
    log.info(f'CUDA VERSION:{torch.version.cuda}')
    log.info(f'CUDNN VERSION:{torch.backends.cudnn.version()}')
    log.info(f'Number CUDA Devices: {torch.cuda.device_count()}') 
    log.info(f'device: {device}')
    
    
    # Trainer
    trainer = Trainer(config=config, model=model, optimizer=optimizer, 
                      logger=log,
                      start_epoch=start_epoch,
                      lr_scheduler=scheduler,             
                      checkpoint_dir=cpkt_fol_name)
    trainer.train()

In [6]:
engine(train_config)

Choose model in your configuration: {model_ViT}, {model_PVT_V2} ? : model_PVT_V2
--------------------------------------------------
Use optimizer Adam with lr:  0.0004
----- LOADING CHECKPOINTS -----
1 :     date_04_10_2021_18.31.26
2 :     date_05_10_2021_00.01.43
3 :     date_05_10_2021_12.56.09
4 :     date_05_10_2021_12.58.04
5 :     date_05_10_2021_13.03.14
6 :     date_05_10_2021_13.05.25
7 :     date_05_10_2021_13.14.19
8 :     date_05_10_2021_13.16.22
9 :     date_05_10_2021_13.17.29
Choose one of these checkpoints above: date_05_10_2021_00.01.43
Checkpoint dist contains:  dict_keys(['epoch', 'state_dict', 'optimizer', 'scheduler', 'loss'])
Optim: AdamW (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0004
    weight_decay: 0.05
)
Scheduler: [0.0004]
Start epoch: 21
2021-10-05-13.24.03 | INFO | LOG_PVT_V2 | Checkpoint Folder C:/Users/ASUS/COVID_19\checkpoints/model_PVT_V2/date_05_10_2021_13.23.09 
2021-10-05-13.24.03 | INFO | LOG_PVT_V2 | 

NameError: name 'train_loss' is not defined

In [None]:
torch.cuda.empty_cache()