In [1]:
import os, re, magic, shutil
from glob import glob
import time, datetime
import random
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import joblib
import datetime as dt

import cv2
from PIL import Image
import matplotlib.pyplot as plt
import torch, gc
from torch import nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import torch.nn.functional as F
import torchvision

#from skimage import io
import sklearn
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss, f1_score, confusion_matrix, classification_report
from sklearn import metrics, preprocessing
from scipy.ndimage import zoom

import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import albumentations as A
import albumentations.pytorch
import wandb
from catalyst.data.sampler import BalanceClassSampler
from torch.utils.data.distributed import DistributedSampler

#### Hyper Param Setting

In [2]:
CFG = {
    'fold_num': 5,
    'seed': 42,
    't_model': 'inception_resnet_v2',
    'load_model': 'inception_resnet_v2_20230421010009', # LOAD TEACHER MODEL
    's_model': 'mobilenetv2_100',
    'img_size': 260,
    'alpha': 0.5,
    'epochs': 200,
    'train_bs':64,
    'valid_bs':32,
    'lr': 1e-4, ## learning rate
    'num_workers': 8,
    'verbose_step': 1,
    'patience' : 3,
    'device': 'cuda:0',
    'freezing': False,
    'trainable_layer': 6,
    'model_path': './models'
}

#### wandb init

In [3]:
time_now = dt.datetime.now()
run_id = time_now.strftime("%Y%m%d%H%M%S")
project_name = 'KD_test_' + run_id
user = 'hojunking'
run_name = 'KD_test_'+ run_id

In [4]:
main_path = '../Data/carbon_data/'
label_list = ["10Kwalk","battery",'box','else','green dish','handkerchief', 'leftover',
              'milk', 'paper', 'pet','plug','receipt', 'shopping bag', 'stairs', 'toothcup',
             'transportation', 'trash picking', 'tumbler', 'wrap']

total_train_img_paths = []
total_train_img_labels = []
total_test_img_paths = []
total_test_img_labels = []

for label in label_list: ## 각 레이블 돌기
    print(f'label: {label}')
    img_paths = [] 
    img_labels = []

    # default ratio
    train_ratio = 150
    test_ratio = 50

    dir_path = main_path + label ## 레이블 폴더 경로
    count = 0
    for folder, subfolders, filenames in os.walk(dir_path): ## 폴더 내 모든 파일 탐색
    
        for img in filenames: ## 각 파일 경로, 레이블 저장
            count +=1
            if count > train_ratio + test_ratio + 10000:
                break
            
            img_paths.append(folder+'/'+img)
            img_labels.append(label)
        
    random.shuffle(img_paths)
    print(f'img_paths len : {len(img_paths)}\n')

    # if label == 'else': ## 10walking 데이터 비율 설정하기 (데이터수: 2494)
    #     train_ratio = 200
    #     test_ratio = 29
    # elif label == 'green dish': 
    #     train_ratio = 1392
    #     test_ratio = 300
    # elif label == 'handkerchief':
    #     train_ratio = 2000
    #     test_ratio = 433     
    # elif label == 'milk':
    #     train_ratio = 2000
    #     test_ratio = 374
    # elif label == 'paper':
    #     train_ratio = 1300
    #     test_ratio = 365
    # elif label == 'pet':
    #     train_ratio = 2000
    #     test_ratio = 402
    # elif label == 'plug':
    #     train_ratio = 2200
    #     test_ratio = 725
    # elif label == 'receipt':
    #     train_ratio = 600
    #     test_ratio = 243 
    # elif label == 'shopping bag':
    #     train_ratio = 1300
    #     test_ratio = 413
    # elif label == 'stairs':
    #     train_ratio = 3000
    #     test_ratio = 1057
    # elif label == 'toothcup':
    #     train_ratio = 2200
    #     test_ratio = 603
    # elif label == 'paper':
    #     train_ratio = 1300
    #     test_ratio = 365
    # elif label == 'transportation':
    #     train_ratio = 1800
    #     test_ratio = 435
    # elif label == 'trash picking':
    #     train_ratio = 1300
    #     test_ratio = 327
    # elif label == 'tumbler':
    #     train_ratio = 2500
    #     test_ratio = 845
    # elif label == 'wrap':
    #     train_ratio = 1300
    #     test_ratio = 389
        
    total_train_img_paths.extend(img_paths[:train_ratio])
    total_train_img_labels.extend(img_labels[:train_ratio])

    total_test_img_paths.extend(img_paths[-test_ratio:])
    total_test_img_labels.extend(img_labels[-test_ratio:])

print('Train_Images: ',len(total_train_img_paths))
print("Train_Images_labels:", len(total_train_img_labels))
print('Test_Images: ',len(total_test_img_paths))
print("Test_Images_labels:", len(total_test_img_labels))

label: 10Kwalk
img_paths len : 1953

label: battery
img_paths len : 2800

label: box
img_paths len : 1987

label: else
img_paths len : 229

label: green dish
img_paths len : 1682

label: handkerchief
img_paths len : 2433

label: leftover
img_paths len : 1978

label: milk
img_paths len : 2374

label: paper
img_paths len : 1665

label: pet
img_paths len : 2402

label: plug
img_paths len : 2925

label: receipt
img_paths len : 843

label: shopping bag
img_paths len : 1713

label: stairs
img_paths len : 4057

label: toothcup
img_paths len : 2803

label: transportation
img_paths len : 2235

label: trash picking
img_paths len : 1627

label: tumbler
img_paths len : 3345

label: wrap
img_paths len : 1689

Train_Images:  2850
Train_Images_labels: 2850
Test_Images:  950
Test_Images_labels: 950


In [5]:
## Pandas 데이터프레임 만들기
trn_df = pd.DataFrame(total_train_img_paths, columns=['image_id'])
trn_df['dir'] = trn_df['image_id'].apply(lambda x: os.path.dirname(x))
trn_df['image_id'] = trn_df['image_id'].apply(lambda x: os.path.basename(x))
trn_df['label'] = total_train_img_labels
train = trn_df
train

Unnamed: 0,image_id,dir,label
0,10Kwalk_1504.jpg,../Data/carbon_data/10Kwalk,10Kwalk
1,10Kwalk_1142.jpg,../Data/carbon_data/10Kwalk,10Kwalk
2,10Kwalk_237.jpg,../Data/carbon_data/10Kwalk,10Kwalk
3,10Kwalk_520.jpg,../Data/carbon_data/10Kwalk,10Kwalk
4,10Kwalk_77.jpg,../Data/carbon_data/10Kwalk,10Kwalk
...,...,...,...
2845,wrap_139.jpg,../Data/carbon_data/wrap,wrap
2846,wrap_659.jpg,../Data/carbon_data/wrap,wrap
2847,wrap_469.jpg,../Data/carbon_data/wrap,wrap
2848,wrap_39.jpg,../Data/carbon_data/wrap,wrap


In [6]:
# Label Encoding
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'].values)

In [7]:
train.label.values

array([ 0,  0,  0, ..., 18, 18, 18])

In [8]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [9]:
def get_img(path, sub_path=None):
    try:
        im_bgr = cv2.imread(path)
        im_rgb = im_bgr[:, :, ::-1]
        past_path = path
    except: ## 이미지 에러 발생 시 백지로 대체
        im_bgr = cv2.imread('../Data/carbon_reduction/temp_img.jpg')
        im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

In [10]:
transform_train = A.Compose(
    [
        A.RandomResizedCrop(p=1, height=CFG['img_size'] ,width=CFG['img_size'], scale=(0.65, 0.75),ratio=(0.90, 1.10)),
        A.SafeRotate(p=0.5, limit=(-20, 20), interpolation=2, border_mode=0, value=(0, 0, 0), mask_value=None),
        A.HorizontalFlip(p=0.5),
        A.ColorJitter(always_apply=True, p=0.5, contrast=0.2, saturation=0.3, hue=0.2),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
        A.pytorch.transforms.ToTensorV2()
        ])

transform_train_cap = A.Compose(
    [
        A.RandomResizedCrop(p=1, height=CFG['img_size'] ,width=CFG['img_size'], scale=(0.65, 0.85),ratio=(0.90, 1.10)),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
        A.pytorch.transforms.ToTensorV2()
        ])

transform_test = A.Compose(
    [
        A.Resize(height = CFG['img_size'], width = CFG['img_size']),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
        A.pytorch.transforms.ToTensorV2()
        ])

In [11]:
class ColonDataset(Dataset):
    def __init__(self, df, data_root, transform=None, transform2=None, output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transform = transform
        self.transform2 = transform2
        self.data_root = data_root
        self.output_label = output_label
        
        if output_label == True:
            self.labels = self.df['label'].values
        
        # EXEPTION TRANSFORM FOR CAPTURE IMAGES
        self.cap_image = le.fit_transform(['10Kwalk', 'battery','receipt'])
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        # GET labels
        if self.output_label:
            target = self.labels[index]
        # GET IMAGES
        path = "{}/{}".format(self.data_root[index], self.df.iloc[index]['image_id'])
        img  = get_img(path)
        
        # TRANSFORM1, TRANSFORM2 PROCESS
        if self.transform:
            if target in self.cap_image and self.transform2:
                transformed = self.transform2(image=img)
            else:
                transformed = self.transform(image=img)
            img = transformed['image']
                
        if self.output_label == True:
            return img, target
        else:
            return img

In [12]:
class ForcepImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class=2, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained, num_classes=n_class)
        # n_features = self.model.classifier.in_features
        # self.model.classifier = nn.Linear(n_features, n_class)
    def freezing(self, freeze=False, trainable_layer = 2):
        
        if freeze:
            num_layers = len(list(model.parameters()))
            for i, param in enumerate(model.parameters()):
                if i < num_layers - trainable_layer*2:
                    param.requires_grad = False    
            
    def forward(self, x):
        x = self.model(x)
        return x

In [13]:
def prepare_dataloader(df, trn_idx, val_idx, data_root=train.dir.values):
    
    train_ = df.loc[trn_idx,:].reset_index(drop=True)
    valid_ = df.loc[val_idx,:].reset_index(drop=True)
    train_data_root = data_root[trn_idx]
    valid_data_root = data_root[val_idx]
    
        
    train_ds = ColonDataset(train_,
                            train_data_root,
                            transform=transform_train,
                            transform2=transform_train_cap,
                            output_label=True)
    valid_ds = ColonDataset(valid_,
                            valid_data_root,
                            transform=transform_test,
                            output_label=True)
    # WEIGHTEDRANDOMSAMPLER
    class_counts = train_.label.value_counts(sort=False).to_dict()
    num_samples = sum(class_counts.values())
    print(f'cls_cnts: {len(class_counts)}\nnum_samples:{num_samples}')
    
    # weight 제작, 전체 학습 데이터 수를 해당 클래스의 데이터 수로 나누어 줌
    class_weights = {l:round(num_samples/class_counts[l], 2) for l in class_counts.keys()}
    t_labels = train_.label.to_list()
    
    # class 별 weight를 전체 trainset에 대응시켜 sampler에 넣어줌
    weights = [class_weights[t_labels[i]] for i in range(int(num_samples))]


    # weight 제작, 전체 학습 데이터 수를 해당 클래스의 데이터 수로 나누어 줌
    class_weights = {l:round(num_samples/class_counts[l], 2) for l in class_counts.keys()}

    # class 별 weight를 전체 trainset에 대응시켜 sampler에 넣어줌
    weights = [class_weights[t_labels[i]] for i in range(int(num_samples))] 
    sampler = torch.utils.data.WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))
    
    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG['train_bs'],
        pin_memory=True,
        drop_last=False,
        shuffle=False,
        sampler=sampler, 
        num_workers=CFG['num_workers']
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds, 
        batch_size=CFG['valid_bs'],
        num_workers=CFG['num_workers'],
        shuffle=False,
        pin_memory=True,
    )
    return train_loader, val_loader

In [14]:
# def distill_loss(student_logits, labels, teacher_logits, criterion, alpha=0.1):
#     # TEACHER & STUDENT LOSS
#     distillation_loss = criterion(student_logits, teacher_logits)
    
#     # STUDENT & LABEL LOSS
#     student_loss = criterion(student_logits, labels)
#     loss_b = alpha * student_loss + (1-alpha) * distillation_loss

#     return loss_b

In [15]:
def distill_loss(student_logits, labels, teacher_logits, criterion, alpha=0.1, temperature=2):
    # STUDENT & LABEL LOSS
    student_loss = criterion(student_logits, labels)

    # TEACHER & STUDENT LOSS
    teacher_probs = F.softmax(teacher_logits / temperature, dim=1)
    student_probs = F.softmax(student_logits / temperature, dim=1)
    distillation_loss = F.kl_div(torch.log(student_probs), teacher_probs, reduction="batchmean") * (temperature ** 2)

    # FINAL LOSS
    loss_b = alpha * student_loss + (1 - alpha) * distillation_loss
    return loss_b


In [16]:
def train_one_epoch(epoch, s_model, t_model, loss_tr, optimizer, train_loader, device, scheduler=None, alpha =0.1):
    t = time.time()

    # SET MODEL TRAINING MODE
    s_model.train()
    t_model.eval()
    
    running_loss = None
    loss_sum = 0
    student_preds_all = []
    image_targets_all = []
    acc_list = []
    
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        optimizer.zero_grad()
        
        # STUDENT MODEL PREDICTION
        with torch.cuda.amp.autocast():
            student_preds = s_model(imgs)
            
            # TEACHER MODEL DISTILLATION (NO UPDATE)
            with torch.no_grad():
                teacher_preds = t_model(imgs)
            
            loss = distill_loss(student_preds, image_labels, teacher_preds, loss_tr, alpha)
            loss_sum+=loss.detach()
            
            # BACKPROPAGATION
            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        
            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01    
        
            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                pbar.set_description(description)
        
        student_preds_all += [torch.argmax(student_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
    if scheduler is not None:
        scheduler.step()
    
    student_preds_all = np.concatenate(student_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    
    matrix = confusion_matrix(image_targets_all,student_preds_all)
    epoch_f1 = f1_score(image_targets_all, student_preds_all, average='macro')
    
    accuracy = (student_preds_all==image_targets_all).mean()
    
    trn_loss = loss_sum/len(train_loader)
    
    return student_preds_all, accuracy, trn_loss, matrix, epoch_f1

def valid_one_epoch(epoch,s_model, t_model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False, alpha =0.1):
    ## Sets the model to valid mode
    s_model.eval()
    t_model.eval()

    t = time.time()
    loss_sum = 0
    sample_num = 0
    avg_loss = 0
    student_preds_all = []
    image_targets_all = []
    
    acc_list = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        student_preds = s_model(imgs)   #output = model(input)
        teacher_preds = t_model(imgs) # teacher prediction
        
        loss = distill_loss(student_preds, image_labels, teacher_preds, loss_fn, alpha)
        
        student_preds_all += [torch.argmax(student_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
        avg_loss += loss.item()
        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]
        
        description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
        pbar.set_description(description)
    
    student_preds_all = np.concatenate(student_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    matrix = confusion_matrix(image_targets_all,student_preds_all)
    
    epoch_f1 = f1_score(image_targets_all, student_preds_all, average='macro')
    acc = (student_preds_all==image_targets_all).mean()
    val_loss = avg_loss/len(val_loader)
    
    return student_preds_all, acc, val_loss, matrix, epoch_f1

#### Early Stopping

In [17]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif score <= self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            print(f'Best F1 score from now: {self.best_score}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
        
        return self.early_stop

In [18]:
if __name__ == '__main__':
    seed_everything(CFG['seed'])
    
    # WANDB TRACKER INIT
    wandb.init(project=project_name, entity=user)
    wandb.config.update(CFG)
    wandb.run.name = run_name
    wandb.define_metric("Train Accuracy", step_metric="epoch")
    wandb.define_metric("Valid Accuracy", step_metric="epoch")
    wandb.define_metric("Train Loss", step_metric="epoch")
    wandb.define_metric("Valid Loss", step_metric="epoch")
    wandb.define_metric("Train Macro F1 Score", step_metric="epoch")
    wandb.define_metric("Valid Macro F1 Score", step_metric="epoch")
    wandb.define_metric("Train-Valid Accuracy", step_metric="epoch")
    
    model_dir = CFG['model_path'] + '/{}_{}'.format(CFG['s_model'], run_id)
    train_dir = train.dir.values
    best_fold = 0
    best_f1 =0.0
    
    print('Model: {}'.format(CFG['s_model']))
    # MAKE MODEL DIR
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    
    
    # STRATIFIED K-FOLD DEFINITION
    folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train.label.values)
    for fold, (trn_idx, val_idx) in enumerate(folds):
        print(f'Training start with fold: {fold} epoch: {CFG["epochs"]} \n')

        # EARLY STOPPING DEFINITION
        early_stopping = EarlyStopping(patience=CFG["patience"], verbose=True)

        # DATALOADER DEFINITION
        train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx, data_root=train_dir)

        # MODEL & DEVICE DEFINITION 
        device = torch.device(CFG['device'])
        student_model = ForcepImgClassifier(CFG['s_model'], train.label.nunique(), pretrained=True)
        teacher_model = ForcepImgClassifier(CFG['t_model'], train.label.nunique(), pretrained=True)


        # T_MODEL DATA PARALLEL
        teacher_model.to(device)
        if torch.cuda.device_count() > 1:
            teacher_model = nn.DataParallel(teacher_model)

        # LOAD TEACHER_MODE WEIGHT
        load_model = CFG['model_path'] +'/' + CFG['load_model'] + '/' + CFG['t_model']
        teacher_model.load_state_dict(torch.load(load_model))

        # MODEL FREEZING
        student_model.freezing(freeze = CFG['freezing'], trainable_layer = CFG['trainable_layer'])
        if CFG['freezing'] ==True:
            for name, param in student_model.named_parameters():
                if param.requires_grad == True:
                    print(f"{name}: {param.requires_grad}")

        # S_MODEL DATA PARALLEL
        student_model.to(device)
        if torch.cuda.device_count() > 1:
            student_model = nn.DataParallel(student_model)

        scaler = torch.cuda.amp.GradScaler()   
        optimizer = torch.optim.Adam(student_model.parameters(), lr=CFG['lr'])
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.5, step_size=5)

        # DISTILLATION RATE
        alpha = CFG['alpha']

        # CRITERION (LOSS FUNCTION)
        loss_tr = nn.CrossEntropyLoss().to(device) #MyCrossEntropyLoss().to(device)
        loss_fn = nn.CrossEntropyLoss().to(device)
        wandb.watch(student_model, loss_tr, log='all')

        train_acc_list = []
        train_matrix_list = []
        train_f1_list = []
        valid_acc_list = []
        valid_matrix_list = []
        valid_f1_list = []

        start = time.time()
        print(f'Fold: {fold}\n')
        for epoch in range(CFG['epochs']):
            print('Epoch {}/{}'.format(epoch, CFG['epochs'] - 1))

            # TRAINIG
            train_preds_all, train_acc, train_loss, train_matrix, train_f1 = train_one_epoch(epoch, student_model, teacher_model,loss_tr, optimizer, train_loader, device, scheduler=scheduler, alpha = alpha)
            wandb.log({'Train Accuracy':train_acc, 'Train Loss' : train_loss, 'Train F1': train_f1, 'epoch' : epoch})

            # VALIDATION
            with torch.no_grad():
                valid_preds_all, valid_acc, valid_loss, valid_matrix, valid_f1= valid_one_epoch(epoch, student_model, teacher_model, loss_fn, val_loader, device, scheduler=None, alpha = alpha)
                wandb.log({'Valid Accuracy':valid_acc, 'Valid Loss' : valid_loss, 'Valid F1': valid_f1 ,'epoch' : epoch})
            print(f'Epoch [{epoch}], Train Loss : [{train_loss :.5f}] Val Loss : [{valid_loss :.5f}] Val F1 Score : [{valid_f1:.5f}]')

            # SAVE ALL RESULTS
            train_acc_list.append(train_acc)
            train_matrix_list.append(train_matrix)
            train_f1_list.append(train_f1)

            valid_acc_list.append(valid_acc)
            valid_matrix_list.append(valid_matrix)
            valid_f1_list.append(valid_f1)

            # MODEL SAVE (THE BEST MODEL OF ALL OF FOLD PROCESS)
            if valid_f1 > best_f1:
                best_f1 = valid_f1
                torch.save(student_model.state_dict(), (model_dir+'/{}').format(CFG['s_model']))

            # EARLY STOPPING
            stop = early_stopping(valid_f1)
            if stop:
                print("stop called")   
                break

        end = time.time() - start
        time_ = str(datetime.timedelta(seconds=end)).split(".")[0]
        print("time :", time_)

        # PRINT BEST F1 SCORE MODEL OF FOLD
        best_index = valid_f1_list.index(max(valid_f1_list))
        print(f'fold: {fold}, Best Epoch : {best_index}/ {len(valid_f1_list)}')
        print(f'Best Train Marco F1 : {train_f1_list[best_index]:.5f}')
        print(train_matrix_list[best_index])
        print(f'Best Valid Marco F1 : {valid_f1_list[best_index]:.5f}')
        print(valid_matrix_list[best_index])
        print('-----------------------------------------------------------------------')

        ## K-FOLD END
        if valid_f1_list[best_index] > best_fold:
            best_fold = valid_f1_list[best_index]
            top_fold = fold
    print(f'Best Fold F1 score: {best_fold} Top fold : {top_fold}')


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhojunking[0m. Use [1m`wandb login --relogin`[0m to force relogin


Model: mobilenetv2_100
Training start with fold: 0 epoch: 200 

cls_cnts: 19
num_samples:2280
Fold: 0

Epoch 0/199


epoch 0 loss: 6.5727: 100%|█████████████████████| 18/18 [00:57<00:00,  3.18s/it]
epoch 0 loss: 4.4202: 100%|█████████████████████| 18/18 [00:16<00:00,  1.07it/s]

Epoch [0], Train Loss : [5.73288] Val Loss : [4.41256] Val F1 Score : [0.28393]
Epoch 1/199



epoch 1 loss: 4.1432: 100%|█████████████████████| 18/18 [00:50<00:00,  2.79s/it]
epoch 1 loss: 3.1437: 100%|█████████████████████| 18/18 [00:15<00:00,  1.14it/s]

Epoch [1], Train Loss : [3.77295] Val Loss : [3.14232] Val F1 Score : [0.51979]
Epoch 2/199



epoch 2 loss: 2.9839: 100%|█████████████████████| 18/18 [00:52<00:00,  2.90s/it]
epoch 2 loss: 2.2618: 100%|█████████████████████| 18/18 [00:16<00:00,  1.12it/s]

Epoch [2], Train Loss : [2.59937] Val Loss : [2.26317] Val F1 Score : [0.66465]
Epoch 3/199



epoch 3 loss: 2.0384: 100%|█████████████████████| 18/18 [00:50<00:00,  2.81s/it]
epoch 3 loss: 1.8181: 100%|█████████████████████| 18/18 [00:15<00:00,  1.17it/s]

Epoch [3], Train Loss : [1.91012] Val Loss : [1.82185] Val F1 Score : [0.71417]
Epoch 4/199



epoch 4 loss: 1.5832: 100%|█████████████████████| 18/18 [00:50<00:00,  2.82s/it]
epoch 4 loss: 1.5645: 100%|█████████████████████| 18/18 [00:15<00:00,  1.13it/s]

Epoch [4], Train Loss : [1.47887] Val Loss : [1.56839] Val F1 Score : [0.75885]
Epoch 5/199



epoch 5 loss: 1.3509: 100%|█████████████████████| 18/18 [00:51<00:00,  2.85s/it]
epoch 5 loss: 1.4740: 100%|█████████████████████| 18/18 [00:16<00:00,  1.06it/s]

Epoch [5], Train Loss : [1.31063] Val Loss : [1.47788] Val F1 Score : [0.77150]
Epoch 6/199



epoch 6 loss: 1.2289: 100%|█████████████████████| 18/18 [00:52<00:00,  2.91s/it]
epoch 6 loss: 1.4054: 100%|█████████████████████| 18/18 [00:15<00:00,  1.15it/s]

Epoch [6], Train Loss : [1.18195] Val Loss : [1.40846] Val F1 Score : [0.78060]
Epoch 7/199



epoch 7 loss: 0.9575: 100%|█████████████████████| 18/18 [00:51<00:00,  2.86s/it]
epoch 7 loss: 1.3471: 100%|█████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [7], Train Loss : [1.08248] Val Loss : [1.35142] Val F1 Score : [0.80468]
Epoch 8/199



epoch 8 loss: 1.0062: 100%|█████████████████████| 18/18 [00:49<00:00,  2.72s/it]
epoch 8 loss: 1.2908: 100%|█████████████████████| 18/18 [00:15<00:00,  1.14it/s]

Epoch [8], Train Loss : [1.10285] Val Loss : [1.29682] Val F1 Score : [0.80442]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8046837434266739
Epoch 9/199



epoch 9 loss: 0.9165: 100%|█████████████████████| 18/18 [00:49<00:00,  2.76s/it]
epoch 9 loss: 1.2579: 100%|█████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [9], Train Loss : [0.99356] Val Loss : [1.26255] Val F1 Score : [0.81876]
Epoch 10/199



epoch 10 loss: 0.9284: 100%|████████████████████| 18/18 [00:53<00:00,  2.99s/it]
epoch 10 loss: 1.2428: 100%|████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [10], Train Loss : [0.98238] Val Loss : [1.24762] Val F1 Score : [0.80690]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.818757097392439
Epoch 11/199



epoch 11 loss: 0.9045: 100%|████████████████████| 18/18 [00:50<00:00,  2.80s/it]
epoch 11 loss: 1.2240: 100%|████████████████████| 18/18 [00:16<00:00,  1.11it/s]

Epoch [11], Train Loss : [0.91656] Val Loss : [1.22937] Val F1 Score : [0.81554]
EarlyStopping counter: 2 out of 3
Best F1 score from now: 0.818757097392439
Epoch 12/199



epoch 12 loss: 0.8792: 100%|████████████████████| 18/18 [00:52<00:00,  2.92s/it]
epoch 12 loss: 1.2186: 100%|████████████████████| 18/18 [00:16<00:00,  1.11it/s]

Epoch [12], Train Loss : [0.89057] Val Loss : [1.22276] Val F1 Score : [0.82060]
Epoch 13/199



epoch 13 loss: 0.7353: 100%|████████████████████| 18/18 [00:48<00:00,  2.71s/it]
epoch 13 loss: 1.1971: 100%|████████████████████| 18/18 [00:15<00:00,  1.16it/s]

Epoch [13], Train Loss : [0.87112] Val Loss : [1.20070] Val F1 Score : [0.82237]
Epoch 14/199



epoch 14 loss: 0.7226: 100%|████████████████████| 18/18 [00:51<00:00,  2.87s/it]
epoch 14 loss: 1.1841: 100%|████████████████████| 18/18 [00:16<00:00,  1.10it/s]

Epoch [14], Train Loss : [0.83957] Val Loss : [1.18796] Val F1 Score : [0.82171]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8223681916104675
Epoch 15/199



epoch 15 loss: 0.7911: 100%|████████████████████| 18/18 [00:52<00:00,  2.92s/it]
epoch 15 loss: 1.1843: 100%|████████████████████| 18/18 [00:16<00:00,  1.10it/s]

Epoch [15], Train Loss : [0.83346] Val Loss : [1.18873] Val F1 Score : [0.83113]
Epoch 16/199



epoch 16 loss: 0.9000: 100%|████████████████████| 18/18 [00:52<00:00,  2.94s/it]
epoch 16 loss: 1.1717: 100%|████████████████████| 18/18 [00:16<00:00,  1.11it/s]

Epoch [16], Train Loss : [0.80561] Val Loss : [1.17630] Val F1 Score : [0.83053]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8311319565110892
Epoch 17/199



epoch 17 loss: 0.9027: 100%|████████████████████| 18/18 [00:50<00:00,  2.80s/it]
epoch 17 loss: 1.1715: 100%|████████████████████| 18/18 [00:15<00:00,  1.14it/s]

Epoch [17], Train Loss : [0.85927] Val Loss : [1.17587] Val F1 Score : [0.83223]
Epoch 18/199



epoch 18 loss: 0.7573: 100%|████████████████████| 18/18 [00:50<00:00,  2.83s/it]
epoch 18 loss: 1.1514: 100%|████████████████████| 18/18 [00:17<00:00,  1.05it/s]

Epoch [18], Train Loss : [0.79021] Val Loss : [1.15536] Val F1 Score : [0.82475]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.832233892784856
Epoch 19/199



epoch 19 loss: 0.7036: 100%|████████████████████| 18/18 [00:55<00:00,  3.06s/it]
epoch 19 loss: 1.1472: 100%|████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [19], Train Loss : [0.79207] Val Loss : [1.15171] Val F1 Score : [0.83366]
Epoch 20/199



epoch 20 loss: 0.8847: 100%|████████████████████| 18/18 [00:51<00:00,  2.86s/it]
epoch 20 loss: 1.1416: 100%|████████████████████| 18/18 [00:15<00:00,  1.16it/s]

Epoch [20], Train Loss : [0.84771] Val Loss : [1.14556] Val F1 Score : [0.83007]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8336583094542044
Epoch 21/199



epoch 21 loss: 0.7923: 100%|████████████████████| 18/18 [00:49<00:00,  2.78s/it]
epoch 21 loss: 1.1442: 100%|████████████████████| 18/18 [00:15<00:00,  1.16it/s]

Epoch [21], Train Loss : [0.78223] Val Loss : [1.14865] Val F1 Score : [0.82953]
EarlyStopping counter: 2 out of 3
Best F1 score from now: 0.8336583094542044
Epoch 22/199



epoch 22 loss: 0.7324: 100%|████████████████████| 18/18 [00:49<00:00,  2.77s/it]
epoch 22 loss: 1.1391: 100%|████████████████████| 18/18 [00:17<00:00,  1.06it/s]

Epoch [22], Train Loss : [0.77673] Val Loss : [1.14396] Val F1 Score : [0.83427]
Epoch 23/199



epoch 23 loss: 0.7631: 100%|████████████████████| 18/18 [00:51<00:00,  2.84s/it]
epoch 23 loss: 1.1447: 100%|████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [23], Train Loss : [0.79908] Val Loss : [1.14924] Val F1 Score : [0.83414]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8342710143200225
Epoch 24/199



epoch 24 loss: 0.7131: 100%|████████████████████| 18/18 [00:51<00:00,  2.86s/it]
epoch 24 loss: 1.1255: 100%|████████████████████| 18/18 [00:16<00:00,  1.09it/s]

Epoch [24], Train Loss : [0.78563] Val Loss : [1.13105] Val F1 Score : [0.83439]
Epoch 25/199



epoch 25 loss: 0.7004: 100%|████████████████████| 18/18 [00:51<00:00,  2.86s/it]
epoch 25 loss: 1.1334: 100%|████████████████████| 18/18 [00:15<00:00,  1.17it/s]

Epoch [25], Train Loss : [0.80007] Val Loss : [1.13827] Val F1 Score : [0.84354]
Epoch 26/199



epoch 26 loss: 0.7745: 100%|████████████████████| 18/18 [00:51<00:00,  2.87s/it]
epoch 26 loss: 1.1372: 100%|████████████████████| 18/18 [00:15<00:00,  1.15it/s]

Epoch [26], Train Loss : [0.81664] Val Loss : [1.14102] Val F1 Score : [0.83546]
EarlyStopping counter: 1 out of 3
Best F1 score from now: 0.8435357301388945
Epoch 27/199



epoch 27 loss: 1.0773: 100%|████████████████████| 18/18 [00:51<00:00,  2.87s/it]
epoch 27 loss: 1.1264: 100%|████████████████████| 18/18 [00:16<00:00,  1.08it/s]

Epoch [27], Train Loss : [0.83271] Val Loss : [1.13045] Val F1 Score : [0.84173]
EarlyStopping counter: 2 out of 3
Best F1 score from now: 0.8435357301388945
Epoch 28/199



epoch 28 loss: 0.6396: 100%|████████████████████| 18/18 [00:54<00:00,  3.03s/it]
epoch 28 loss: 1.1325: 100%|████████████████████| 18/18 [00:16<00:00,  1.07it/s]

Epoch [28], Train Loss : [0.80601] Val Loss : [1.13678] Val F1 Score : [0.83175]
EarlyStopping counter: 3 out of 3
Best F1 score from now: 0.8435357301388945
stop called
time : 0:33:12
fold: 0, Best Epoch : 25/ 29
Best Train Marco F1 : 0.89487
[[ 96   2   0   0   0   1   1   0   0   0   0   3   0   0   0   2   0   0
    0]
 [  1 126   0   0   0   0   0   0   0   0   0   9   0   0   0   0   0   0
    0]
 [  0   0 117   4   0   0   0   4   0   1   0   0   0   0   0   0   1   0
    0]
 [  0   2   1  75   1   1   0   2   5   2   1   3   9   1   1   5   2   3
    4]
 [  0   0   0   0 109   3   3   0   0   1   0   0   4   0   1   0   1   0
    4]
 [  0   0   1   1   0  92   1   0   3   1   0   0   9   0   1   0   2   0
    2]
 [  0   0   0   0   2   0 110   0   0   0   0   0   0   0   2   0   0   0
    4]
 [  0   0   2   2   0   0   0 109   2   0   3   0   2   0   0   0   0   0
    0]
 [  0   0   1   2   0   0   0   2 111   0   0   3   0   0   0   2   0   0
    0]
 [  0   0   0   1   1   1  


