**Load Module**

In [1]:
# Utils
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import os
import time
from datetime import datetime
from sklearn.model_selection import StratifiedShuffleSplit
from utility.utils import EarlyStopping, WarmUpLR, most_recent_folder, most_recent_weights, best_acc_weights, last_epoch

# Torch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torch.optim as optim
from torch.utils.data import DataLoader, SubsetRandomSampler, Subset

import torchvision
from torchvision import transforms, datasets

# Analysis
from sklearn.metrics import confusion_matrix, classification_report
import wandb


**Seed & Device Setting**

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # GPU 여러 개 사용할 경우 사용
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 42
set_seed(seed)

# 디바이스 설정
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

**Wandb Setting & Set Hyperparameters (수정 구간)**

In [None]:
config = {
    "batch_size" : 128,
    "num_epochs" : 300,
    "learning_rate" : 0.1,
    "momentum" : 0.9,
    "weight_decay" : 5e-4,
    "nesterov" : True,
    "gamma" : 0.4,
    "warm" : 1,
    "patience" : 150,
    "plateau_patience" : 15,
    "milestones" : [120, 190, 220, 260, 280],
    "scheduler" : "MultiStepLR",
    "pin_memory" : True,

    "resume" : False,
    "model_name" : "pyramidnet_cutmix",
    "stratified_data" : True,
    "alpha": 48,
    "depth": 110
}
if config["resume"] == True:
    config["warm"] = 0
model_name = config["model_name"]
wandb.init(project="CIFAR-100_Classification", name=config["model_name"], config=config)

In [4]:
fine_to_superclass = {
    4: 0, 30: 0, 55: 0, 72: 0, 95: 0,       # aquatic mammals
    1: 1, 32: 1, 67: 1, 73: 1, 91: 1,       # fish
    54: 2, 62: 2, 70: 2, 82: 2, 92: 2,      # flowers
    9: 3, 10: 3, 16: 3, 28: 3, 61: 3,       # food containers
    0: 4, 51: 4, 53: 4, 57: 4, 83: 4,       # fruit and vegetables
    22: 5, 39: 5, 40: 5, 86: 5, 87: 5,      # household electrical devices
    5: 6, 20: 6, 25: 6, 84: 6, 94: 6,       # household furniture
    6: 7, 7: 7, 14: 7, 18: 7, 24: 7,        # insects
    3: 8, 42: 8, 43: 8, 88: 8, 97: 8,       # large carnivores
    12: 9, 17: 9, 37: 9, 68: 9, 76: 9,      # large man-made outdoor things
    23: 10, 33: 10, 49: 10, 60: 10, 71: 10, # large natural outdoor scenes
    15: 11, 19: 11, 21: 11, 31: 11, 38: 11, # large omnivores and herbivores
    34: 12, 63: 12, 64: 12, 66: 12, 75: 12, # medium-sized mammals
    26: 13, 45: 13, 77: 13, 79: 13, 99: 13, # non-insect invertebrates
    2: 14, 11: 14, 35: 14, 46: 14, 98: 14,  # people
    27: 15, 29: 15, 44: 15, 78: 15, 93: 15, # reptiles
    36: 16, 50: 16, 65: 16, 74: 16, 80: 16, # small mammals
    47: 17, 52: 17, 56: 17, 59: 17, 96: 17, # trees
    8: 18, 13: 18, 48: 18, 58: 18, 90: 18,  # vehicles 1
    41: 19, 69: 19, 81: 19, 85: 19, 89: 19  # vehicles 2
}

# Data Preprocessing

**Data Augementation (수정 구간)**

In [5]:
CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

train_val_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(degrees=15),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD)
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD)
])

**Splitting the training data**

In [6]:
train_val_data = datasets.CIFAR100(root='./data', train=True, download=True, transform=train_val_transform)
val_data = datasets.CIFAR100(root='./data', train=True, download=True, transform=test_transform)
test_data = datasets.CIFAR100(root='./data', train=False, download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [7]:
# Stratified 방식으로 train 데이터를 train/val로 나누기
if config["stratified_data"]:
  labels = train_val_data.targets
  stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

  for train_index, val_index in stratified_split.split(train_val_data.data, labels):
      train_data = Subset(train_val_data, train_index)
      val_data = Subset(val_data, val_index)
else:
  # RandomSampler 방식으로 train 데이터를 train/val로 나누기
  num_train = len(train_val_data)
  indices = list(range(num_train))
  split = int(np.floor(0.2 * num_train))  # validation 데이터를 20%로 설정

  np.random.shuffle(indices)
  train_idx, val_idx = indices[split:], indices[:split]

  train_sampler = SubsetRandomSampler(train_idx)
  val_sampler = SubsetRandomSampler(val_idx)

**Define Dataloader**

In [8]:
if config["stratified_data"]:
  train_loader = DataLoader(train_data, batch_size=config["batch_size"], shuffle=True, num_workers=4, pin_memory=config["pin_memory"])
  val_loader = DataLoader(val_data, batch_size=config["batch_size"], shuffle=False, num_workers=4, pin_memory=config["pin_memory"])
  test_loader = DataLoader(test_data, batch_size=config["batch_size"], shuffle=False, num_workers=4, pin_memory=config["pin_memory"])
else:
  train_loader = DataLoader(train_val_data, batch_size=config["batch_size"], sampler=train_sampler, num_workers=4, pin_memory=config["pin_memory"])
  val_loader = DataLoader(train_val_data, batch_size=config["batch_size"], shuffle=False, sampler=val_sampler, num_workers=4, pin_memory=config["pin_memory"])
  test_loader = DataLoader(test_data, batch_size=config["batch_size"], shuffle=False, num_workers=4, pin_memory=config["pin_memory"])

**Model Initialize (수정 부분)**

In [9]:
# CutMix에서 반환되는 label은 criterion을 계산할 때 
# 두 개의 label을 (1 - lam) 비율로 가중치를 곱해 더하도록 수정
def cutmix_criterion(outputs, labels):
    if isinstance(labels, tuple):
        labels1, labels2, lam = labels
        loss = lam * F.cross_entropy(outputs, labels1) + (1 - lam) * F.cross_entropy(outputs, labels2)
    else:
        loss = F.cross_entropy(outputs, labels)

    return loss

In [13]:
from models import resnet, wideresnet
from models import pyramidNet

print("use:", device)

# 모델 초기화
model = pyramidNet.pyramidnet(config["depth"], config["alpha"])
net = model
# 모델을 GPU로 이동
net.to(device)

# print(summary(net, (3,224,224)))
optimizer = optim.SGD(net.parameters(), lr=config["learning_rate"], momentum=config["momentum"], weight_decay=config["weight_decay"], nesterov=config["nesterov"])

if config["scheduler"] == "MultiStepLR":
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, config["milestones"], gamma=config["gamma"]) #learning rate decay
elif config["scheduler"] == "ReduceLROnPlateau":
    train_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=config["gamma"], patience=config["plateau_patience"])
else:
    print("No scheduler!!")
iter_per_epoch = len(train_loader)
if config["warm"]:
    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * config["warm"])
early_stopping = EarlyStopping(patience=config["patience"], verbose=True)

print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

use: cuda:0
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0.0005, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'initial_lr': 0.1, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61]}]


**CutMix 추가부분**

In [11]:
def cutmix(batch, alpha=1.0):
    input, labels = batch
    indices = torch.randperm(input.size(0))
    shuffled_input = input[indices]
    shuffled_labels = labels[indices]
    lam = np.random.beta(alpha, alpha)

    image_h, image_w = input.shape[2:]
    cx = np.random.uniform(0, image_w)
    cy = np.random.uniform(0, image_h)
    w = image_w * np.sqrt(1 - lam)
    h = image_h * np.sqrt(1 - lam)
    x0 = int(np.round(max(cx - w / 2, 0)))
    x1 = int(np.round(min(cx + w / 2, image_w)))
    y0 = int(np.round(max(cy - h / 2, 0)))
    y1 = int(np.round(min(cy + h / 2, image_h)))

    input[:, :, y0:y1, x0:x1] = shuffled_input[:, :, y0:y1, x0:x1]
    labels = (labels, shuffled_labels, lam)

    return input, labels

**Model Train**

In [12]:
# 모델 학습 함수
def train_model(net, trainloader, optimizer, epoch):
    net.train()
    start = time.time()
    top1_correct = 0
    top5_correct = 0
    total = 0

    superclass_correct = 0
    superclass_total = 0

    running_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        labels, inputs = labels.to(device), inputs.to(device)

        if use_cutmix:
            inputs, labels = cutmix((inputs, labels), alpha=1.0)

        optimizer.zero_grad()

        # 모델 예측
        outputs = net(inputs)
        loss = cutmix_criterion(outputs=outputs, labels=labels)

        _, preds = torch.max(outputs, 1)
        labels1, labels2, lam = labels

        top1_correct += (lam * torch.sum((preds == labels1).int()).item() + (1 - lam) * torch.sum((preds == labels2).int()).item())


        _, top5_preds = outputs.topk(5, 1, True, True)
        top5_correct += (lam * torch.sum((top5_preds.eq(labels1.view(-1, 1).expand_as(top5_preds))).int()).item() + 
                         (1 - lam) * torch.sum((top5_preds.eq(labels2.view(-1, 1).expand_as(top5_preds))).int()).item())
        
        total += labels1.size(0)

        # Superclass 정확도 계산 (CutMix 적용)
        super_preds = torch.tensor([fine_to_superclass[p.item()] for p in preds], dtype=torch.long)
        super_labels1 = torch.tensor([fine_to_superclass[t.item()] for t in labels1], dtype=torch.long)
        super_labels2 = torch.tensor([fine_to_superclass[t.item()] for t in labels2], dtype=torch.long)
        
        superclass_correct += (lam * torch.sum(super_preds == super_labels1).item() + 
                               (1 - lam) * torch.sum(super_preds == super_labels2).item())
        superclass_total += super_labels1.size(0)

        # 역전파 및 최적화
        loss.backward()
        optimizer.step()

        n_iter = (epoch - 1) * len(train_loader) + batch_idx + 1
        running_loss += loss.item() * inputs.size(0)

        # 30번째 배치마다 상태 출력
        if (batch_idx + 1) % 30 == 0:
            print(f"Batch [{batch_idx+1}/{len(trainloader)}], Loss: {loss.item():.4f}")

        # LR에 warmup 되는 게 안 보이지만 실제론 이루어지고 있음. batch로 돌 때 이루어짐.
        if epoch <= config["warm"]:
            warmup_scheduler.step()


    # Epoch당 평균 손실 계산 및 출력
    epoch_loss = running_loss / total

    num_epochs = config["num_epochs"]
    finish = time.time()

    # 세부 클래스 및 슈퍼 클래스 정확도 계산
    top1_acc = top1_correct / total
    top5_acc = top5_correct / total
    superclass_acc = superclass_correct / superclass_total
    accuracy = [top1_acc, top5_acc, superclass_acc]

    print("Train set: Epoch: {}, Average loss:{:.4f}, LR: {:.6f} Top-1 Accuracy: {:.4f}, Top-5 Accuracy: {:.4f}, SuperClass Accuracy: {:.4f}, Time consumed:{:.2f}s".format(
        epoch,
        epoch_loss,
        optimizer.param_groups[0]['lr'],
        top1_acc,
        top5_acc,
        superclass_acc,
        finish - start
    ))

    return epoch_loss, accuracy

def eval_training(net, testloader, epoch):
    net.eval()
    top1_correct = 0
    top5_correct = 0
    total = 0

    superclass_correct = 0
    superclass_total = 0

    start = time.time()

    test_loss = 0.0

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)

            _, preds = torch.max(outputs, 1)
            
            if isinstance(labels, tuple):
                labels1, labels2, lam = labels
                top1_correct += (lam * torch.sum((preds == labels1).int()).item() + (1 - lam) * torch.sum((preds == labels2).int()).item())
    
                _, top5_preds = outputs.topk(5, 1, True, True)
                top5_correct += (lam * torch.sum((top5_preds.eq(labels1.view(-1, 1).expand_as(top5_preds))).int()).item() + 
                                 (1 - lam) * torch.sum((top5_preds.eq(labels2.view(-1, 1).expand_as(top5_preds))).int()).item())
            else:
                top1_correct += torch.sum(preds == labels).item()
                
                _, top5_preds = outputs.topk(5, 1, True, True)
                top5_correct += torch.sum(top5_preds.eq(labels.view(-1, 1).expand_as(top5_preds))).item()
    
            total += labels.size(0)
            test_loss += cutmix_criterion(outputs, labels)

            # Superclass accuracy (CutMix 적용)
            if isinstance(labels, tuple):
                super_preds = torch.tensor([fine_to_superclass[p.item()] for p in preds], dtype=torch.long)
                super_labels1 = torch.tensor([fine_to_superclass[t.item()] for t in labels1], dtype=torch.long)
                super_labels2 = torch.tensor([fine_to_superclass[t.item()] for t in labels2], dtype=torch.long)
                
                superclass_correct += (lam * torch.sum(super_preds == super_labels1).item() + 
                                       (1 - lam) * torch.sum(super_preds == super_labels2).item())
                superclass_total += super_labels1.size(0)
            else:
                super_preds = torch.tensor([fine_to_superclass[p.item()] for p in preds], dtype=torch.long)
                super_labels = torch.tensor([fine_to_superclass[t.item()] for t in labels], dtype=torch.long)
                
                superclass_correct += torch.sum(super_preds == super_labels).item()
                superclass_total += super_labels.size(0)

    finish = time.time()
    average_loss = test_loss / total

    # 세부 클래스 및 슈퍼 클래스 정확도 계산
    top1_acc = top1_correct / total
    top5_acc = top5_correct / total
    superclass_acc = superclass_correct / superclass_total
    accuracy = [top1_acc, top5_acc, superclass_acc]

    print("Test set: Epoch: {}, Average loss:{:.4f}, Top-1 Accuracy: {:.4f}, Top-5 Accuracy: {:.4f}, SuperClass Accuracy: {:.4f}, Time consumed:{:.2f}s".format(
        epoch,
        average_loss,
        top1_acc,
        top5_acc,
        superclass_acc,
        finish - start
    ))
    print()

    return average_loss, accuracy

In [14]:
DATE_FORMAT = '%A_%d_%B_%Y_%Hh_%Mm_%Ss'
TIME_NOW = datetime.now().strftime(DATE_FORMAT)

if config["resume"]:
    recent_folder = most_recent_folder(os.path.join("runs", config["model_name"], "savepoints"), DATE_FORMAT)
    if not recent_folder:
        raise Exception("no recent folder were found")

    checkpoint_path = os.path.join("runs", config["model_name"], "savepoints", recent_folder)
else:
    checkpoint_path = os.path.join("runs", config["model_name"], "savepoints", TIME_NOW)

if not os.path.exists(checkpoint_path):
    os.makedirs(checkpoint_path)
checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')

best_acc = 0.0
epoch = 0
weights_path = False

if config["resume"]:
    best_weights = best_acc_weights(os.path.join("runs", config["model_name"], "savepoints", recent_folder))
    if best_weights:
        weights_path = os.path.join("runs", config["model_name"], "savepoints", recent_folder, best_weights)
        print('found best acc weights file:{}'.format(weights_path))
        print('load best training file to test acc...')
        net.load_state_dict(torch.load(weights_path))
        _, best_acc = eval_training(net, test_loader, epoch)
        print("best acc is {:0.2f}".format(best_acc[0]))

    recent_weights_file = most_recent_weights(os.path.join("runs", config["model_name"], "savepoints", recent_folder))
    if not recent_weights_file:
        raise Exception("no recent weights file were found")
    weights_path = os.path.join("runs", config["model_name"], "savepoints", recent_folder, recent_weights_file)
    print("loading weights file {} to resume training......".format(weights_path))
    net.load_state_dict(torch.load(weights_path))

    resume_epoch = last_epoch(os.path.join("runs", config["model_name"], "savepoints", recent_folder))

    optimizer = optim.SGD(net.parameters(), lr=config["learning_rate"], momentum=config["momentum"], weight_decay=config["weight_decay"], nesterov=config["nesterov"])

    if config["scheduler"] == "MultiStepLR":
        train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, config["milestones"], gamma=config["gamma"]) #learning rate decay
    elif config["scheduler"] == "ReduceLROnPlateau":
        train_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=config["gamma"], patience=config["plateau_patience"])
    else:
        print("No scheduler!!")

found best acc weights file:runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-205-best.pth
load best training file to test acc...
Test set: Epoch: 0, Average loss:0.0117, Top-1 Accuracy: 0.6186, Top-5 Accuracy: 0.8850, SuperClass Accuracy: 0.7477, Time consumed:23.03s

best acc is 0.01
loading weights file runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-205-best.pth to resume training......


**Per-Epoch Training**

In [None]:
# CutMix 사용 여부 결정
use_cutmix = True

start_time = time.time()
for epoch in tqdm(range(1, config["num_epochs"] + 1)):
    if epoch > config["warm"]:
        if config["scheduler"] == "MultiStepLR":
            train_scheduler.step()
        elif config["scheduler"] == "ReduceLROnPlateau":
            train_scheduler.step(test_loss)

    if config["resume"]:
        if epoch <= resume_epoch:
            continue

    epoch_loss, train_accuracy = train_model(net, train_loader, optimizer, epoch=epoch)
    test_loss, test_accuracy = eval_training(net, val_loader, epoch)

    if best_acc[0] < test_accuracy[0]:
        if weights_path:  # 모델의 용량 이슈로 새로운 best 모델이 갱신되면 이전 best 모델 삭제
            os.remove(weights_path)
        weights_path = checkpoint_path.format(net=config["model_name"], epoch=epoch, type="best")
        print("saving weights file to {}".format(weights_path))
        torch.save(net.state_dict(), weights_path)
        best_acc = test_accuracy

    early_stopping(test_loss, net)
    if early_stopping.early_stop:
        print("Early stopping")
        break

    wandb.log({
        "epoch": epoch,
        "learning_rate": optimizer.param_groups[0]['lr'],
        "train_loss": epoch_loss,
        "train top-1 accuracy": train_accuracy[0],
        "train top-5 accuracy": train_accuracy[1],
        "train super-class accuracy": train_accuracy[2],
        "val_loss": test_loss,
        "valid top-1 accuracy": test_accuracy[0],
        "valid top-5 accuracy": test_accuracy[1],
        "valid super-class accuracy": test_accuracy[2]
    })

end_time = time.time()
wandb.log({"train_time": end_time - start_time})

# Result
print(f"Result of best {model_name} = Epoch : {epoch}   Loss : {test_loss:.4f}   Top-1 Accuracy : {best_acc[0]*100:.4f}%  Top-5 Accuracy : {best_acc[1]*100:.4f}%   Super Accuracy : {best_acc[2]*100:.4f}%   Total_Accuracy : {sum(best_acc)*100:.4f}    Time : {end_time - start_time:.4f}")



Batch [30/313], Loss: 3.3871
Batch [60/313], Loss: 2.3762
Batch [90/313], Loss: 2.7042
Batch [120/313], Loss: 2.5588
Batch [150/313], Loss: 2.8380
Batch [180/313], Loss: 1.6006
Batch [210/313], Loss: 2.3573
Batch [240/313], Loss: 2.0872
Batch [270/313], Loss: 3.0832
Batch [300/313], Loss: 2.2601
Train set: Epoch: 206, Average loss:2.8575, LR: 0.000000 Top-1 Accuracy: 0.3382, Top-5 Accuracy: 0.6205, SuperClass Accuracy: 0.4448, Time consumed:292.58s


 69%|████████████████████████████▏            | 206/300 [05:15<02:23,  1.53s/it]

Test set: Epoch: 206, Average loss:0.0117, Top-1 Accuracy: 0.6223, Top-5 Accuracy: 0.8867, SuperClass Accuracy: 0.7564, Time consumed:22.74s

saving weights file to runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-206-best.pth
Batch [30/313], Loss: 2.5026
Batch [60/313], Loss: 2.5503
Batch [90/313], Loss: 4.0575
Batch [120/313], Loss: 2.6646
Batch [150/313], Loss: 3.0923
Batch [180/313], Loss: 3.4716
Batch [210/313], Loss: 3.3966
Batch [240/313], Loss: 2.0999
Batch [270/313], Loss: 2.4564
Batch [300/313], Loss: 3.1548
Train set: Epoch: 207, Average loss:2.7957, LR: 0.000000 Top-1 Accuracy: 0.3530, Top-5 Accuracy: 0.6360, SuperClass Accuracy: 0.4627, Time consumed:338.80s


 69%|████████████████████████████▎            | 207/300 [11:17<06:13,  4.01s/it]

Test set: Epoch: 207, Average loss:0.0115, Top-1 Accuracy: 0.6229, Top-5 Accuracy: 0.8867, SuperClass Accuracy: 0.7559, Time consumed:22.76s

saving weights file to runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-207-best.pth
Batch [30/313], Loss: 2.8707
Batch [60/313], Loss: 3.2983
Batch [90/313], Loss: 2.2811
Batch [120/313], Loss: 2.9062
Batch [150/313], Loss: 3.1974
Batch [180/313], Loss: 3.1085
Batch [210/313], Loss: 2.0469
Batch [240/313], Loss: 3.2323
Batch [270/313], Loss: 2.9525
Batch [300/313], Loss: 2.5595
Train set: Epoch: 208, Average loss:2.7812, LR: 0.000000 Top-1 Accuracy: 0.3545, Top-5 Accuracy: 0.6385, SuperClass Accuracy: 0.4646, Time consumed:347.23s


 69%|████████████████████████████▍            | 208/300 [17:23<11:34,  7.55s/it]

Test set: Epoch: 208, Average loss:0.0114, Top-1 Accuracy: 0.6225, Top-5 Accuracy: 0.8891, SuperClass Accuracy: 0.7566, Time consumed:19.66s

Batch [30/313], Loss: 3.2260
Batch [60/313], Loss: 2.9827
Batch [90/313], Loss: 2.0926
Batch [120/313], Loss: 3.1589
Batch [150/313], Loss: 2.5797
Batch [180/313], Loss: 3.0923
Batch [210/313], Loss: 2.5308
Batch [240/313], Loss: 3.3128
Batch [270/313], Loss: 3.5614
Batch [300/313], Loss: 2.5716
Train set: Epoch: 209, Average loss:2.8085, LR: 0.000000 Top-1 Accuracy: 0.3515, Top-5 Accuracy: 0.6335, SuperClass Accuracy: 0.4610, Time consumed:332.44s


 70%|████████████████████████████▌            | 209/300 [23:19<18:41, 12.32s/it]

Test set: Epoch: 209, Average loss:0.0115, Top-1 Accuracy: 0.6230, Top-5 Accuracy: 0.8873, SuperClass Accuracy: 0.7573, Time consumed:22.90s

saving weights file to runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-209-best.pth
EarlyStopping counter: 1 out of 150
Batch [30/313], Loss: 3.1001
Batch [60/313], Loss: 2.4744
Batch [90/313], Loss: 2.5690
Batch [120/313], Loss: 1.8831
Batch [150/313], Loss: 2.9961
Batch [180/313], Loss: 2.9051
Batch [210/313], Loss: 2.7257
Batch [240/313], Loss: 3.1979
Batch [270/313], Loss: 2.9834
Batch [300/313], Loss: 3.3693
Train set: Epoch: 210, Average loss:2.7958, LR: 0.000000 Top-1 Accuracy: 0.3555, Top-5 Accuracy: 0.6374, SuperClass Accuracy: 0.4657, Time consumed:329.11s


 70%|████████████████████████████▋            | 210/300 [29:11<28:17, 18.86s/it]

Test set: Epoch: 210, Average loss:0.0118, Top-1 Accuracy: 0.6191, Top-5 Accuracy: 0.8863, SuperClass Accuracy: 0.7535, Time consumed:22.98s

EarlyStopping counter: 2 out of 150
Batch [30/313], Loss: 2.2347
Batch [60/313], Loss: 2.7620
Batch [90/313], Loss: 1.7385
Batch [120/313], Loss: 2.7052
Batch [150/313], Loss: 2.8652
Batch [180/313], Loss: 3.2362
Batch [210/313], Loss: 2.2420
Batch [240/313], Loss: 3.1434
Batch [270/313], Loss: 2.2375
Batch [300/313], Loss: 3.1126
Train set: Epoch: 211, Average loss:2.7772, LR: 0.000000 Top-1 Accuracy: 0.3582, Top-5 Accuracy: 0.6410, SuperClass Accuracy: 0.4695, Time consumed:357.46s


 70%|████████████████████████████▊            | 211/300 [35:30<42:14, 28.48s/it]

Test set: Epoch: 211, Average loss:0.0116, Top-1 Accuracy: 0.6249, Top-5 Accuracy: 0.8869, SuperClass Accuracy: 0.7594, Time consumed:21.24s

saving weights file to runs/pyramidnet_cutmix/savepoints/Wednesday_09_October_2024_12h_36m_55s/pyramidnet_cutmix-211-best.pth
EarlyStopping counter: 3 out of 150
Batch [30/313], Loss: 2.0052
Batch [60/313], Loss: 2.7879
Batch [90/313], Loss: 3.0547
Batch [120/313], Loss: 3.0511
Batch [150/313], Loss: 3.0522
Batch [180/313], Loss: 2.6013
Batch [210/313], Loss: 2.7042
Batch [240/313], Loss: 3.1259
Batch [270/313], Loss: 3.0427
Batch [300/313], Loss: 2.6886
Train set: Epoch: 212, Average loss:2.8039, LR: 0.000000 Top-1 Accuracy: 0.3504, Top-5 Accuracy: 0.6360, SuperClass Accuracy: 0.4610, Time consumed:335.06s


 71%|████████████████████████████▉            | 212/300 [41:27<59:31, 40.59s/it]

Test set: Epoch: 212, Average loss:0.0117, Top-1 Accuracy: 0.6223, Top-5 Accuracy: 0.8880, SuperClass Accuracy: 0.7570, Time consumed:22.45s

EarlyStopping counter: 4 out of 150
Batch [30/313], Loss: 3.0282
Batch [60/313], Loss: 2.9220
Batch [90/313], Loss: 2.8026
Batch [120/313], Loss: 3.3549
Batch [150/313], Loss: 1.6080
Batch [180/313], Loss: 2.1568
Batch [210/313], Loss: 2.8581
Batch [240/313], Loss: 2.5262
Batch [270/313], Loss: 3.1851
Batch [300/313], Loss: 2.2480
Train set: Epoch: 213, Average loss:2.7978, LR: 0.000000 Top-1 Accuracy: 0.3493, Top-5 Accuracy: 0.6368, SuperClass Accuracy: 0.4597, Time consumed:354.78s


 71%|███████████████████████████▋           | 213/300 [47:45<1:23:16, 57.43s/it]

Test set: Epoch: 213, Average loss:0.0116, Top-1 Accuracy: 0.6234, Top-5 Accuracy: 0.8886, SuperClass Accuracy: 0.7575, Time consumed:23.02s

EarlyStopping counter: 5 out of 150
Batch [30/313], Loss: 1.4899
Batch [60/313], Loss: 2.0064
Batch [90/313], Loss: 3.1633
Batch [120/313], Loss: 3.3914
Batch [150/313], Loss: 3.2359
Batch [180/313], Loss: 3.0854
Batch [210/313], Loss: 3.3859
Batch [240/313], Loss: 3.2282
Batch [270/313], Loss: 1.8539
Batch [300/313], Loss: 2.7971
Train set: Epoch: 214, Average loss:2.8286, LR: 0.000000 Top-1 Accuracy: 0.3458, Top-5 Accuracy: 0.6288, SuperClass Accuracy: 0.4563, Time consumed:320.29s


 71%|███████████████████████████▊           | 214/300 [53:28<1:49:32, 76.42s/it]

Test set: Epoch: 214, Average loss:0.0115, Top-1 Accuracy: 0.6244, Top-5 Accuracy: 0.8883, SuperClass Accuracy: 0.7577, Time consumed:22.31s

EarlyStopping counter: 6 out of 150
Batch [30/313], Loss: 2.3451
Batch [60/313], Loss: 2.9397
Batch [90/313], Loss: 3.0925
Batch [120/313], Loss: 1.5443
Batch [150/313], Loss: 2.9272
Batch [180/313], Loss: 2.2676
Batch [210/313], Loss: 3.2567
Batch [240/313], Loss: 3.0475
Batch [270/313], Loss: 2.8383
Batch [300/313], Loss: 3.2636
Train set: Epoch: 215, Average loss:2.8165, LR: 0.000000 Top-1 Accuracy: 0.3465, Top-5 Accuracy: 0.6334, SuperClass Accuracy: 0.4566, Time consumed:357.44s


 72%|███████████████████████████▏          | 215/300 [59:48<2:25:38, 102.81s/it]

Test set: Epoch: 215, Average loss:0.0116, Top-1 Accuracy: 0.6191, Top-5 Accuracy: 0.8848, SuperClass Accuracy: 0.7537, Time consumed:22.73s

EarlyStopping counter: 7 out of 150
Batch [30/313], Loss: 2.7942
Batch [60/313], Loss: 2.3632
Batch [90/313], Loss: 1.5877
Batch [120/313], Loss: 2.8755
Batch [150/313], Loss: 2.7821
Batch [180/313], Loss: 3.0242
Batch [210/313], Loss: 2.5171
Batch [240/313], Loss: 3.2394
Batch [270/313], Loss: 2.1938
Batch [300/313], Loss: 2.1497
Train set: Epoch: 216, Average loss:2.8277, LR: 0.000000 Top-1 Accuracy: 0.3445, Top-5 Accuracy: 0.6269, SuperClass Accuracy: 0.4543, Time consumed:325.42s


 72%|█████████████████████████▉          | 216/300 [1:05:36<3:01:49, 129.87s/it]

Test set: Epoch: 216, Average loss:0.0115, Top-1 Accuracy: 0.6222, Top-5 Accuracy: 0.8882, SuperClass Accuracy: 0.7549, Time consumed:22.56s

EarlyStopping counter: 8 out of 150
Batch [30/313], Loss: 3.0939
Batch [60/313], Loss: 2.2824
Batch [90/313], Loss: 1.6925
Batch [120/313], Loss: 3.3833
Batch [150/313], Loss: 3.0708
Batch [180/313], Loss: 2.4018
Batch [210/313], Loss: 3.1968
Batch [240/313], Loss: 2.0052
Batch [270/313], Loss: 2.5101
Batch [300/313], Loss: 2.8962
Train set: Epoch: 217, Average loss:2.8075, LR: 0.000000 Top-1 Accuracy: 0.3465, Top-5 Accuracy: 0.6331, SuperClass Accuracy: 0.4560, Time consumed:345.89s


 72%|██████████████████████████          | 217/300 [1:11:45<3:44:41, 162.43s/it]

Test set: Epoch: 217, Average loss:0.0117, Top-1 Accuracy: 0.6242, Top-5 Accuracy: 0.8875, SuperClass Accuracy: 0.7569, Time consumed:22.97s

EarlyStopping counter: 9 out of 150
Batch [30/313], Loss: 2.8332
Batch [60/313], Loss: 2.9553
Batch [90/313], Loss: 2.8796
Batch [120/313], Loss: 2.6201
Batch [150/313], Loss: 3.3853
Batch [180/313], Loss: 3.0745
Batch [210/313], Loss: 2.8958
Batch [240/313], Loss: 3.4697
Batch [270/313], Loss: 2.9721
Batch [300/313], Loss: 1.9998
Train set: Epoch: 218, Average loss:2.8853, LR: 0.000000 Top-1 Accuracy: 0.3360, Top-5 Accuracy: 0.6182, SuperClass Accuracy: 0.4439, Time consumed:359.83s


 73%|██████████████████████████▏         | 218/300 [1:18:07<4:31:01, 198.31s/it]

Test set: Epoch: 218, Average loss:0.0117, Top-1 Accuracy: 0.6211, Top-5 Accuracy: 0.8874, SuperClass Accuracy: 0.7558, Time consumed:22.80s

EarlyStopping counter: 10 out of 150
Batch [30/313], Loss: 2.9473
Batch [60/313], Loss: 2.1088
Batch [90/313], Loss: 2.9777
Batch [120/313], Loss: 2.8658
Batch [150/313], Loss: 3.0235
Batch [180/313], Loss: 2.8055
Batch [210/313], Loss: 2.0854
Batch [240/313], Loss: 3.3646
Batch [270/313], Loss: 2.8817
Batch [300/313], Loss: 3.2181
Train set: Epoch: 219, Average loss:2.7829, LR: 0.000000 Top-1 Accuracy: 0.3563, Top-5 Accuracy: 0.6399, SuperClass Accuracy: 0.4684, Time consumed:333.65s


 73%|██████████████████████████▎         | 219/300 [1:24:04<5:07:58, 228.14s/it]

Test set: Epoch: 219, Average loss:0.0115, Top-1 Accuracy: 0.6220, Top-5 Accuracy: 0.8884, SuperClass Accuracy: 0.7563, Time consumed:22.65s

EarlyStopping counter: 11 out of 150
Batch [30/313], Loss: 2.7935
Batch [60/313], Loss: 3.6092
Batch [90/313], Loss: 3.6649


In [None]:
# end_time = time.time()
# wandb.log({"train_time": end_time - start_time})

# # Result
# print(f" Result of ResNet = Epoch : {epoch}   Loss : {test_loss:.4f}   Top-1 Accuracy : {test_accuracy[0]*100:.4f}%  Top-5 Accuracy : {test_accuracy[1]*100:.4f}%   Super Accuracy : {test_accuracy[2]*100:.4f}%   Total_Accuracy : {sum(test_accuracy)*100:.4f}    Time : {end_time - start_time:.4f}")

**Best Model Test**

In [None]:
def all_accuracy(net, test_loader, device):
    net2.eval()
    top1_correct = 0
    top5_correct = 0
    total = 0

    superclass_correct = 0
    superclass_total = 0

    with torch.no_grad():
        for _, (images, labels) in enumerate(test_loader):

            images, labels = images.to(device), labels.to(device)
            outputs = net2(images)

            _, preds = torch.max(outputs, 1)
            top1_correct += torch.sum(preds == labels).item()

            _, top5_preds = outputs.topk(5, 1, True, True)
            top5_correct += torch.sum(top5_preds.eq(labels.view(-1, 1).expand_as(top5_preds))).item()

            total += labels.size(0)

            # Superclass accuracy
            super_preds = torch.tensor([fine_to_superclass[p.item()] for p in preds], dtype=torch.long)
            super_labels = torch.tensor([fine_to_superclass[t.item()] for t in labels], dtype=torch.long)
            superclass_correct += torch.sum(super_preds == super_labels).item()
            superclass_total += super_labels.size(0)

    # 세부 클래스 및 슈퍼 클래스 정확도 계산
    top1_acc = top1_correct / total
    top5_acc = top5_correct / total
    superclass_acc = superclass_correct / superclass_total
    accuracy = [top1_acc, top5_acc, superclass_acc]

    return accuracy

In [None]:
net2 = model
net2.to(device)
net2.eval()

# 이 코드로 best 모델이 load 되지 않을 경우 아래 코드에서 경로 직접 지정
recent_folder = most_recent_folder(os.path.join("runs", config["model_name"], "savepoints"), DATE_FORMAT)
if not recent_folder:
    raise Exception("no recent folder were found")

best_weights = best_acc_weights(os.path.join("runs", config["model_name"], "savepoints", recent_folder))
if best_weights:
    weights_path = os.path.join("runs", config["model_name"], "savepoints", recent_folder, best_weights)
    print('found best acc weights file:{}'.format(weights_path))
    print('load best training file to test acc...')
    net2.load_state_dict(torch.load(weights_path))

acc = all_accuracy(net2, test_loader, device)
print("Top-1 accuracy: {:.4f}".format(acc[0]))
print("Top-5 accuracy: {:.4f}".format(acc[1]))
print("Super-Class accuracy: {:.4f}".format(acc[2]))

wandb.log({
    "Test Top-1 accuracy": acc[0],
    "Test Top-5 accuracy": acc[1],
    "Test Super-Class accuracy": acc[2],
    "Total Score": sum(acc)
    })

In [None]:
# # 위 코드에서 에러가 발생해 경로 지정이 제대로 되지 않을 경우 사용

# net2 = model
# net2.to(device)
# net2.load_state_dict(torch.load("runs/wide_resnet/savepoints/Tuesday_01_October_2024_13h_15m_52s/wide_resnet-229-best.pth"))

# acc = all_accuracy(net2, test_loader, device)
# print("Top-1 accuracy: {:.4f}".format(acc[0]))
# print("Top-5 accuracy: {:.4f}".format(acc[1]))
# print("Super-Class accuracy: {:.4f}".format(acc[2]))
# wandb.log({
#     "Test Top-1 accuracy": acc[0],
#     "Test Top-5 accuracy": acc[1],
#     "Test Super-Class accuracy": acc[2],
#     "Total Score": sum(acc)
#     })
# wandb.finish()

**Data Analysis**

In [None]:
import wandb.sklearn

classes = train_val_data.classes
coarse_classes = [
    'aquatic mammals', 'fish', 'flowers', 'food containers', 'fruit and vegetables', 'household electrical devices',
    'household furniture', 'insects', 'large carnivores', 'large man-made outdoor things',
    'large natural outdoor scenes', 'large omnivores and herbivores', 'medium-sized mammals',
    'non-insect invertebrates', 'people', 'reptiles', 'small mammals', 'trees', 'vehicles 1', 'vehicles 2'
]

y_pred = []
y_true = []
y_probs = []

for x, y in torch.utils.data.DataLoader(dataset=test_data, batch_size=config["batch_size"]):
    x = x.to(device)
    y = y.to(device)
    z = net2(x)
    _, yhat = torch.max(z, 1)
    probs = z.softmax(dim=1).cpu().detach().numpy()

    pred = yhat.data.cpu().numpy()
    
    super_pred = [coarse_classes[fine_to_superclass[p]] for p in pred]
    y_pred.extend(super_pred)

    labels = y.data.cpu().numpy()
    super_labels = [coarse_classes[fine_to_superclass[l]] for l in labels]
    y_true.extend(super_labels)

    y_probs.extend(probs)
    
cf_matrix = confusion_matrix(y_true, y_pred, normalize='true')

df_cm = pd.DataFrame(cf_matrix, index=coarse_classes,
                     columns=coarse_classes)

plt.figure(figsize=(10, 7))
sns.heatmap(df_cm, annot=True, fmt='.2f', cmap='Blues')
plt.title(f'Confusion Matrix of {config["model_name"]} (CIFAR-100 Superclass)')
plt.xlabel('Predicted')
plt.ylabel('Real')

conf_matrix_img = wandb.Image(plt.gcf())
plt.close()

class_report = classification_report(y_true, y_pred, labels=coarse_classes, zero_division=0)

plt.figure(figsize=(10, 7))
plt.text(0.01, 0.05, str(class_report), {'fontsize': 12}, fontproperties='monospace')  # monospaced font
plt.axis('off')  # 축 제거
plt.tight_layout()

class_report_img = wandb.Image(plt.gcf())
plt.close()

wandb.log({
    "Confusion Matrix": conf_matrix_img,
    "Classification Report": class_report_img
    })

In [None]:
wandb.finish()