In [1]:
import csv
import logging
import argparse
import os
from pathlib import Path
from datetime import datetime

import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, average_precision_score
from sklearn.preprocessing import label_binarize

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

from train.clf_evaluate import evaluate_model
from utils.data_loading import ClassificationDatasetJson
from utils.utils import EarlyStopping, stratified_split, ToHSV, ApplyCLAHE
from models.clf.ResNet.resnet import ResNetClassification
from models.clf.EfficientNet.model import EfficientNetClassification

In [2]:
model_name = 'EFFICIENT-NET'
num_classes = 2
size = (256, 256)
epochs = 100
batch_size = 8
amp = True
learning_rate = 1e-4
val = 0.2

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
MODEL = ['RES-NET', 'EFFICIENT-NET']
model = None
assert model_name.upper() in MODEL, f'Model optional type are :{MODEL}, please choose available model_name'

if model_name.upper() == 'RES-NET':
    model = ResNetClassification(num_classes)
elif model_name.upper() == 'EFFICIENT-NET':
    model = EfficientNetClassification(in_channels=3, num_classes=num_classes)
model = model.to(device=device)

Loaded pretrained weights for efficientnet-b0


In [5]:
# set save directory
now_time = datetime.now().strftime("%Y-%m-%d")
now_h = datetime.now().strftime("%H")

dir_checkpoint = Path(f'./MelNv/checkpoints/{model_name}')
dir_indicators = Path(f'./MelNv/indicators/{model_name}')
Path(f'{dir_checkpoint}/{now_time}/{now_h}').mkdir(parents=True, exist_ok=True)
Path(f'{dir_indicators}/{now_time}/{now_h}').mkdir(parents=True, exist_ok=True)
dir_checkpoint_save = Path(f'{dir_checkpoint}/{now_time}/{now_h}')
dir_indicators_save = Path(f'{dir_indicators}/{now_time}/{now_h}')

In [6]:
# set and save header indicators in csv
indicators_header = ['epoch', 'train_loss', 'learning_rate','train_acc', 'train_precision', 'train_recall', 'train_f1', 'train_mAP','val_acc', 'val_precision', 'val_recall', 'val_f1', 'val_mAP']
epoch_csv_path = f'{dir_indicators_save}/train_indicators.csv'
with open(epoch_csv_path, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(indicators_header)

In [7]:
transform = transforms.Compose([
    transforms.Resize(size),
    ApplyCLAHE(),
    transforms.ToTensor(),
])

In [8]:
# Create dataset 
json_data_path = 'E:/Code/Git/All_Model/datasets/json/MelNvTrain_dataset.json'
dataset = ClassificationDatasetJson(json_dir=json_data_path, size=size, transform=transform)
n_val = int(len(dataset) * val)
n_train = len(dataset) - n_val
train_set, val_set = random_split(dataset, [n_train, n_val], generator=torch.Generator().manual_seed(0))

# create data loaders
loader_args = dict(batch_size=batch_size, num_workers=os.cpu_count(), pin_memory=True)
train_loader = DataLoader(train_set, shuffle=True, **loader_args)
val_loader = DataLoader(val_set, shuffle=False, drop_last=True, **loader_args)

# logging
logging.info(f'''Start training:
Epochs:             {epochs}
Batch size:         {batch_size}
Learning rate:      {learning_rate}
Training size:      {n_train}
Validation size:    {n_val}
Checkpoints:        {True}
Device:             {device.type}
Images size:        {size}
Mixed Precision:    {amp}
''')

INFO:root:Loading json file successfully!
INFO:root:Start training:
Epochs:             100
Batch size:         8
Learning rate:      0.0001
Training size:      8478
Validation size:    2119
Checkpoints:        True
Device:             cuda
Images size:        (256, 256)
Mixed Precision:    True



In [9]:
# Set up the optimizer, the loss, the learning rate scheduler and the loss scaling for AMP
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=10)
grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)
early_stopping = EarlyStopping(patience=10, verbose=True)
criterion = nn.CrossEntropyLoss()
best_acc = 0.0
global_step = 0

In [None]:
# training loop
for epoch in range(1, epochs + 1):
    model.train()
    
    val_times_every_epoch = 0 
    
    # save epoch indicators
    epoch_train_loss = 0
    epoch_train_acc = 0
    epoch_train_precision = 0
    epoch_train_recall = 0
    epoch_train_f1 = 0
    epoch_train_ap = 0

    epoch_val_acc = 0
    epoch_val_precision = 0
    epoch_val_recall = 0
    epoch_val_f1 = 0
    epoch_val_ap = 0
    
    with (tqdm(total=n_train, desc=f'Epoch {epoch}/{epochs}', unit='image') as pbar):
        for batch in train_loader:
            images, true_labels = batch['images'], batch['labels']
            images = images.to(device=device, dtype=torch.float32, memory_format=torch.channels_last)
            true_labels = true_labels.to(device=device, dtype=torch.long)
            
            with torch.autocast(device.type if device.type != 'mps' else ' cpu', enabled=amp):
                proba_labels = model(images)
                pred_labels = torch.argmax(F.softmax(proba_labels, dim=1), dim=1)
        
                loss = criterion(proba_labels, true_labels)
                
            optimizer.zero_grad(set_to_none=True)
            grad_scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            grad_scaler.step(optimizer)
            grad_scaler.update()
            pbar.update(images.shape[0])
            
            # calculate indicators
            true_labels_cpu = true_labels.cpu().numpy()
            proba_labels_cpu = proba_labels.detach().cpu().numpy()
            pred_labels_cpu = pred_labels.detach().cpu().numpy()
            
            train_acc = accuracy_score(true_labels_cpu, pred_labels_cpu, normalize=True)
            train_precision = precision_score(true_labels_cpu, pred_labels_cpu, average='binary', zero_division=0)
            train_recall = recall_score(true_labels_cpu, pred_labels_cpu, zero_division=0)
            train_f1 = f1_score(true_labels_cpu, pred_labels_cpu, zero_division=0)
            train_ap = average_precision_score(true_labels_cpu, proba_labels_cpu[:, 1])
            
            # update epoch information
            global_step += 1
            epoch_train_loss += loss.cpu().detach().numpy()
            epoch_train_acc += train_acc
            epoch_train_precision += train_precision
            epoch_train_recall += train_recall
            epoch_train_f1 += train_f1
            epoch_train_ap += train_ap
            pbar.set_postfix(**{'loss (batch)': loss.item()})
            
             # Start validation evaluate
            division_step = (n_train // (5 * batch_size))  # Evaluation round every 5 batch size do a evaluation
            if (global_step % division_step == 0) & (division_step > 0):
                val_times_every_epoch += 1
                
                # indicators in every evaluate
                val_acc, val_precision, val_recall, val_f1, val_ap = evaluate_model(model, val_loader, device, amp, num_classes)

                # average score for every evaluate
                val_acc_avg = sum(val_acc) / max(len(val_loader), 1)
                val_precision_avg = sum(val_precision) / max(len(val_loader), 1)
                val_recall_avg = sum(val_recall) / max(len(val_loader), 1)
                val_f1_avg = sum(val_f1) / max(len(val_loader), 1)
                val_ap_avg = sum(val_ap) / max(len(val_loader), 1)

                logging.info('Validation ACC score: {}'.format(np.round(val_acc_avg, 3)))
                logging.info('Validation Precision score: {}'.format(np.round(val_precision_avg, 3)))
                logging.info('Validation Recall score: {}'.format(np.round(val_recall_avg, 3)))
                logging.info('Validation F1 score: {}'.format(np.round(val_f1_avg, 3)))
                logging.info('Validation AP score: {}'.format(np.round(val_ap_avg, 3)))

                # score add in epoch indicators
                epoch_val_acc += val_acc_avg
                epoch_val_precision += val_precision_avg
                epoch_val_recall += val_recall_avg
                epoch_val_f1 += val_f1_avg
                epoch_val_ap += val_ap_avg

                # update scheduler
                scheduler.step(val_recall_avg)
    
    epoch_val_acc_avg = np.round((epoch_val_acc / val_times_every_epoch), 3)
    
    # set early stop
    stop = early_stopping(epoch_val_acc_avg)
    if stop:
        break


Epoch 1/100:  11%|█▏        | 968/8478 [08:03<54:48,  2.28image/s, loss (batch)=0.426]  