In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2  # CutMix를 위한 v2 transforms 추가
import sys
import os
import torch
import time
import random
import numpy as np
import wandb
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
from tools.tool import EarlyStopping
from models.resnet import resnet18, resnet34, resnet50

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="resnet18_cutmix,flip)")  # CutMix 적용 실험임을 명시

# WandB 설정
config = {
    "model": "resnet18",
    "batch_size": 128,
    "num_epochs": 100,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
    "patience": 10,  # early stopping patience
    "train_ratio": 0.8,
    "val_ratio": 0.1,
    "test_ratio": 0.1,
    "cutmix_alpha": 1.0,  # CutMix 알파 파라미터 추가
    "cutmix_prob": 0.5    # CutMix 적용 확률 추가
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

full_trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

# Stratified 분할을 위한 준비 (train, validation 나누기)
# 모든 라벨을 추출
targets = np.array(full_trainset.targets)

# StratifiedShuffleSplit을 사용하여 8:1:1 비율로 분할
# 먼저 train과 validation을 나눔 (full_trainset에서 8:2)
train_val_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=config["seed"])
train_idx, temp_idx = next(train_val_split.split(np.zeros(len(targets)), targets))

# 그 다음 validation과 test를 나눔 (temp에서 1:1, 전체로 보면 1:1)
val_test_targets = targets[temp_idx]
val_test_split = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=config["seed"])
val_idx_temp, test_idx_temp = next(val_test_split.split(np.zeros(len(val_test_targets)), val_test_targets))

# 원래 인덱스로 매핑
val_idx = temp_idx[val_idx_temp]
test_idx = temp_idx[test_idx_temp]

# Subset 생성
trainset = Subset(full_trainset, train_idx)
valset = Subset(full_trainset, val_idx)
testset_split = Subset(full_trainset, test_idx)  # 원래 테스트셋 대신 stratified split에서 나온 테스트셋 사용

# DataLoader 생성
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)
valloader = DataLoader(valset, batch_size=config["batch_size"], shuffle=False, num_workers=16)
testloader = DataLoader(testset_split, batch_size=config["batch_size"], shuffle=False, num_workers=16)

print(f"Train set size: {len(trainset)}")
print(f"Validation set size: {len(valset)}")
print(f"Test set size: {len(testset_split)}")

# 추가: CutMix 변환 정의
cutmix = transforms_v2.CutMix(alpha=config["cutmix_alpha"], num_classes=100)  # CIFAR-100은 100개 클래스

# CutMix용 손실 함수 정의 (원-핫 인코딩된 레이블 처리)
def cutmix_criterion(outputs, targets):
    """
    CutMix로 혼합된 레이블을 처리하기 위한 손실 함수
    outputs: 모델 출력
    targets: CutMix로 생성된 원-핫 인코딩 레이블
    """
    return torch.nn.functional.cross_entropy(outputs, targets)

def train(model, trainloader, criterion, optimizer, device, epoch):
    """
    학습 함수 (CutMix 적용)
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # CutMix 확률적 적용
        if random.random() < config["cutmix_prob"]:
            inputs, labels = cutmix(inputs, labels)
            # 이 경우 labels은 원-핫 인코딩 형태로 변환됨
            use_cutmix = True
        else:
            use_cutmix = False
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        # CutMix 적용 여부에 따라 손실 함수 선택
        if use_cutmix:
            # CutMix가 적용된 경우 (원-핫 인코딩된 레이블)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 일반적인 경우 (정수 인덱스 레이블)
            loss = criterion(outputs, labels)
            
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # 정확도 계산 - CutMix 적용 여부에 따라 다르게 처리
        if use_cutmix:
            # 원-핫 인코딩된 레이블에서 argmax를 사용해 가장 큰 값의 인덱스 추출
            _, label_idx = labels.max(1)
        else:
            # 정수 인덱스 레이블 그대로 사용
            label_idx = labels
            
        # top-1 정확도 계산
        _, predicted = outputs.max(1)
        total += inputs.size(0)
        correct_top1 += predicted.eq(label_idx).sum().item()
        
        # top-5 정확도 계산
        _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
        correct_top5 += sum([1 for i in range(len(label_idx)) if label_idx[i] in top5_idx[i]])
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy_top1, accuracy_top5

def evaluate(model, dataloader, criterion, device, epoch, phase="val"):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    eval_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            # top-1 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            # top-5 정확도 계산
            _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
            correct_top5 += top5_idx.eq(labels.view(-1, 1).expand_as(top5_idx)).sum().item()
    
    # 평균 손실 및 정확도 계산
    eval_loss = eval_loss / len(dataloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력
    print(f'{phase.capitalize()} set: Epoch: {epoch+1}, Average loss:{eval_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return eval_loss, accuracy_top1, accuracy_top5


# 메인 학습 루프
def main_training_loop(model, trainloader, valloader, testloader, criterion, optimizer, device, num_epochs, patience):
    """
    메인 학습 루프
    """
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    best_acc_top1 = 0.0
    best_acc_top5 = 0.0
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc_top1, train_acc_top5 = train(model, trainloader, criterion, optimizer, device, epoch)
        
        # 평가
        val_loss, val_acc_top1, val_acc_top5 = evaluate(model, valloader, criterion, device, epoch, phase="val")
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy_top1": train_acc_top1,
            "train_accuracy_top5": train_acc_top5,
            "val_loss": val_loss,
            "val_accuracy_top1": val_acc_top1,
            "val_accuracy_top5": val_acc_top5
        })
            
        # 최고 정확도 모델 저장 (top-1 기준)
        if val_acc_top1 > best_acc_top1:
            best_acc_top1 = val_acc_top1
            best_acc_top5_at_best_top1 = val_acc_top5
            print(f'New best top-1 accuracy: {best_acc_top1:.2f}%, top-5 accuracy: {best_acc_top5_at_best_top1:.2f}%')
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
        
        # top-5 accuracy 기록 업데이트
        if val_acc_top5 > best_acc_top5:
            best_acc_top5 = val_acc_top5
            print(f'New best top-5 accuracy: {best_acc_top5:.2f}%')

        # Early stopping 체크 (validation loss 기준)
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered. Training stopped.")
            break
    
    # 훈련 완료 후 모델 평가 (best model 로드)
    if early_stopping.early_stop:
        print("Loading best model from early stopping checkpoint...")
    else:
        print("Loading best model based on validation accuracy...")
        model_path = f'best_model_{wandb.run.name}.pth'
        model.load_state_dict(torch.load(model_path))

    # 최종 테스트 세트 평가
    test_loss, test_acc_top1, test_acc_top5 = evaluate(model, testloader, criterion, device, num_epochs-1, phase="test")
    
    # 테스트 결과를 wandb 로그에 추가 - 이 부분이 누락되어 있어서 추가했습니다
    wandb.log({
        "epoch": epoch + 1,  # 마지막 에폭 또는 early stopping된 에폭
        "test_loss": test_loss,
        "test_accuracy_top1": test_acc_top1,
        "test_accuracy_top5": test_acc_top5
    })
    
    print(f'Finish! Best validation top-1 accuracy: {best_acc_top1:.2f}%, Best validation top-5 accuracy: {best_acc_top5:.2f}%')
    print(f'Final test top-1 accuracy: {test_acc_top1:.2f}%, Final test top-5 accuracy: {test_acc_top5:.2f}%')
    
    # WandB에 최종 결과 기록
    wandb.run.summary["best_val_accuracy_top1"] = best_acc_top1
    wandb.run.summary["best_val_accuracy_top5"] = best_acc_top5
    wandb.run.summary["test_accuracy_top1"] = test_acc_top1
    wandb.run.summary["test_accuracy_top5"] = test_acc_top5

    # Early stopping 정보 저장
    if early_stopping.early_stop:
        wandb.run.summary["early_stopped"] = True
        wandb.run.summary["early_stopped_epoch"] = epoch+1
    else:
        wandb.run.summary["early_stopped"] = False


# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
model = resnet18().to(device)  
criterion = nn.CrossEntropyLoss()  # 손실 함수 정의
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])  # 옵티마이저 정의

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    valloader=valloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"],
    patience=config["patience"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"Total training time: {total_time:.2f} seconds")

# WandB 실행 종료
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/guswls/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msokjh1310[0m ([33msokjh1310-hanyang-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Files already downloaded and verified
Files already downloaded and verified
Train set size: 40000
Validation set size: 5000
Test set size: 5000
Using device: cuda
2개의 GPU를 사용합니다.


  0%|                                                                                                       | 0/100 [00:00<?, ?it/s]

Epoch [1], Batch [50/313], Loss: 4.1063
Epoch [1], Batch [100/313], Loss: 4.0174
Epoch [1], Batch [150/313], Loss: 3.7361
Epoch [1], Batch [200/313], Loss: 3.9623
Epoch [1], Batch [250/313], Loss: 3.4484
Epoch [1], Batch [300/313], Loss: 4.2066
Train set: Epoch: 1, Average loss:4.0147, LR: 0.001000 Top-1 Accuracy: 9.4900%, Top-5 Accuracy: 28.9800%, Time consumed:38.73s
Val set: Epoch: 1, Average loss:3.5829, Top-1 Accuracy: 13.7800%, Top-5 Accuracy: 39.5800%, Time consumed:4.42s

New best top-1 accuracy: 13.78%, top-5 accuracy: 39.58%
New best top-5 accuracy: 39.58%
Validation loss decreased (inf --> 3.582931). Saving model ...


  1%|▉                                                                                            | 1/100 [00:43<1:11:32, 43.36s/it]

Epoch [2], Batch [50/313], Loss: 4.3247
Epoch [2], Batch [100/313], Loss: 3.0742
Epoch [2], Batch [150/313], Loss: 4.0784
Epoch [2], Batch [200/313], Loss: 4.0387
Epoch [2], Batch [250/313], Loss: 3.9220
Epoch [2], Batch [300/313], Loss: 4.1254
Train set: Epoch: 2, Average loss:3.4348, LR: 0.001000 Top-1 Accuracy: 19.9675%, Top-5 Accuracy: 47.8400%, Time consumed:37.12s
Val set: Epoch: 2, Average loss:2.7454, Top-1 Accuracy: 28.4800%, Top-5 Accuracy: 61.0000%, Time consumed:4.49s

New best top-1 accuracy: 28.48%, top-5 accuracy: 61.00%
New best top-5 accuracy: 61.00%
Validation loss decreased (3.582931 --> 2.745432). Saving model ...


  2%|█▊                                                                                           | 2/100 [01:25<1:09:22, 42.48s/it]

Epoch [3], Batch [50/313], Loss: 2.7739
Epoch [3], Batch [100/313], Loss: 4.0987
Epoch [3], Batch [150/313], Loss: 3.8484
Epoch [3], Batch [200/313], Loss: 2.4346
Epoch [3], Batch [250/313], Loss: 3.4548
Epoch [3], Batch [300/313], Loss: 2.3711
Train set: Epoch: 3, Average loss:3.0284, LR: 0.001000 Top-1 Accuracy: 28.9675%, Top-5 Accuracy: 59.7625%, Time consumed:39.24s
Val set: Epoch: 3, Average loss:2.6246, Top-1 Accuracy: 31.8200%, Top-5 Accuracy: 65.0400%, Time consumed:4.91s

New best top-1 accuracy: 31.82%, top-5 accuracy: 65.04%
New best top-5 accuracy: 65.04%
Validation loss decreased (2.745432 --> 2.624639). Saving model ...


  3%|██▊                                                                                          | 3/100 [02:09<1:10:06, 43.37s/it]

Epoch [4], Batch [50/313], Loss: 3.6297
Epoch [4], Batch [100/313], Loss: 2.1832
Epoch [4], Batch [150/313], Loss: 2.2232
Epoch [4], Batch [200/313], Loss: 4.0074
Epoch [4], Batch [250/313], Loss: 2.5438
Epoch [4], Batch [300/313], Loss: 3.5922
Train set: Epoch: 4, Average loss:2.7848, LR: 0.001000 Top-1 Accuracy: 35.0975%, Top-5 Accuracy: 65.8300%, Time consumed:40.07s
Val set: Epoch: 4, Average loss:2.2976, Top-1 Accuracy: 39.7000%, Top-5 Accuracy: 71.7000%, Time consumed:4.95s

New best top-1 accuracy: 39.70%, top-5 accuracy: 71.70%
New best top-5 accuracy: 71.70%
Validation loss decreased (2.624639 --> 2.297627). Saving model ...


  4%|███▋                                                                                         | 4/100 [02:54<1:10:35, 44.12s/it]

Epoch [5], Batch [50/313], Loss: 3.6288
Epoch [5], Batch [100/313], Loss: 2.9908
Epoch [5], Batch [150/313], Loss: 3.4048
Epoch [5], Batch [200/313], Loss: 2.0017
Epoch [5], Batch [250/313], Loss: 1.6664
Epoch [5], Batch [300/313], Loss: 3.0194
Train set: Epoch: 5, Average loss:2.5664, LR: 0.001000 Top-1 Accuracy: 40.5075%, Top-5 Accuracy: 71.1825%, Time consumed:38.96s
Val set: Epoch: 5, Average loss:2.0400, Top-1 Accuracy: 45.5400%, Top-5 Accuracy: 76.8800%, Time consumed:4.91s

New best top-1 accuracy: 45.54%, top-5 accuracy: 76.88%
New best top-5 accuracy: 76.88%
Validation loss decreased (2.297627 --> 2.040017). Saving model ...


  5%|████▋                                                                                        | 5/100 [03:39<1:09:51, 44.12s/it]

Epoch [6], Batch [50/313], Loss: 1.7060
Epoch [6], Batch [100/313], Loss: 1.8482
Epoch [6], Batch [150/313], Loss: 1.9012
Epoch [6], Batch [200/313], Loss: 2.0130
Epoch [6], Batch [250/313], Loss: 3.6180
Epoch [6], Batch [300/313], Loss: 1.7218
Train set: Epoch: 6, Average loss:2.4082, LR: 0.001000 Top-1 Accuracy: 43.5250%, Top-5 Accuracy: 73.5425%, Time consumed:41.44s
Val set: Epoch: 6, Average loss:1.8709, Top-1 Accuracy: 49.6200%, Top-5 Accuracy: 80.1600%, Time consumed:4.87s

New best top-1 accuracy: 49.62%, top-5 accuracy: 80.16%
New best top-5 accuracy: 80.16%
Validation loss decreased (2.040017 --> 1.870878). Saving model ...


  6%|█████▌                                                                                       | 6/100 [04:25<1:10:25, 44.95s/it]

Epoch [7], Batch [50/313], Loss: 3.0954
Epoch [7], Batch [100/313], Loss: 1.6126
Epoch [7], Batch [150/313], Loss: 3.2703
Epoch [7], Batch [200/313], Loss: 3.6132
Epoch [7], Batch [250/313], Loss: 1.3973
Epoch [7], Batch [300/313], Loss: 2.7603
Train set: Epoch: 7, Average loss:2.3405, LR: 0.001000 Top-1 Accuracy: 47.1825%, Top-5 Accuracy: 76.4700%, Time consumed:39.39s
Val set: Epoch: 7, Average loss:1.8449, Top-1 Accuracy: 50.9400%, Top-5 Accuracy: 80.8400%, Time consumed:5.24s

New best top-1 accuracy: 50.94%, top-5 accuracy: 80.84%
New best top-5 accuracy: 80.84%
Validation loss decreased (1.870878 --> 1.844943). Saving model ...


  7%|██████▌                                                                                      | 7/100 [05:10<1:09:39, 44.94s/it]

Epoch [8], Batch [50/313], Loss: 1.8348
Epoch [8], Batch [100/313], Loss: 3.2642
Epoch [8], Batch [150/313], Loss: 2.2593
Epoch [8], Batch [200/313], Loss: 1.3345
Epoch [8], Batch [250/313], Loss: 2.4002
Epoch [8], Batch [300/313], Loss: 1.3293
Train set: Epoch: 8, Average loss:2.1341, LR: 0.001000 Top-1 Accuracy: 51.8725%, Top-5 Accuracy: 79.9350%, Time consumed:38.89s
Val set: Epoch: 8, Average loss:1.6160, Top-1 Accuracy: 56.1800%, Top-5 Accuracy: 84.5200%, Time consumed:4.82s

New best top-1 accuracy: 56.18%, top-5 accuracy: 84.52%
New best top-5 accuracy: 84.52%
Validation loss decreased (1.844943 --> 1.616027). Saving model ...


  8%|███████▍                                                                                     | 8/100 [05:54<1:08:25, 44.63s/it]

Epoch [9], Batch [50/313], Loss: 1.3715
Epoch [9], Batch [100/313], Loss: 0.9351
Epoch [9], Batch [150/313], Loss: 1.2889
Epoch [9], Batch [200/313], Loss: 1.2279
Epoch [9], Batch [250/313], Loss: 3.5772
Epoch [9], Batch [300/313], Loss: 2.9425
Train set: Epoch: 9, Average loss:2.0252, LR: 0.001000 Top-1 Accuracy: 54.1175%, Top-5 Accuracy: 80.8775%, Time consumed:38.87s


  9%|████████▎                                                                                    | 9/100 [06:38<1:07:18, 44.38s/it]

Val set: Epoch: 9, Average loss:1.6059, Top-1 Accuracy: 55.5200%, Top-5 Accuracy: 84.5200%, Time consumed:4.83s

Validation loss decreased (1.616027 --> 1.605919). Saving model ...
Epoch [10], Batch [50/313], Loss: 1.1718
Epoch [10], Batch [100/313], Loss: 2.6991
Epoch [10], Batch [150/313], Loss: 1.1650
Epoch [10], Batch [200/313], Loss: 0.9993
Epoch [10], Batch [250/313], Loss: 0.9819
Epoch [10], Batch [300/313], Loss: 1.2751
Train set: Epoch: 10, Average loss:1.8359, LR: 0.001000 Top-1 Accuracy: 59.5775%, Top-5 Accuracy: 84.8975%, Time consumed:38.76s
Val set: Epoch: 10, Average loss:1.5582, Top-1 Accuracy: 57.2600%, Top-5 Accuracy: 85.5200%, Time consumed:4.86s

New best top-1 accuracy: 57.26%, top-5 accuracy: 85.52%
New best top-5 accuracy: 85.52%
Validation loss decreased (1.605919 --> 1.558218). Saving model ...


 10%|█████████▏                                                                                  | 10/100 [07:22<1:06:20, 44.22s/it]

Epoch [11], Batch [50/313], Loss: 1.0114
Epoch [11], Batch [100/313], Loss: 3.0600
Epoch [11], Batch [150/313], Loss: 1.1113
Epoch [11], Batch [200/313], Loss: 1.0413
Epoch [11], Batch [250/313], Loss: 1.0316
Epoch [11], Batch [300/313], Loss: 2.4228
Train set: Epoch: 11, Average loss:1.7513, LR: 0.001000 Top-1 Accuracy: 62.1700%, Top-5 Accuracy: 86.5975%, Time consumed:39.53s


 11%|██████████                                                                                  | 11/100 [08:06<1:05:38, 44.25s/it]

Val set: Epoch: 11, Average loss:1.6052, Top-1 Accuracy: 56.0600%, Top-5 Accuracy: 84.1400%, Time consumed:4.77s

EarlyStopping 카운터: 1 / 10
Epoch [12], Batch [50/313], Loss: 2.0131
Epoch [12], Batch [100/313], Loss: 1.9560
Epoch [12], Batch [150/313], Loss: 3.2636
Epoch [12], Batch [200/313], Loss: 1.0097
Epoch [12], Batch [250/313], Loss: 0.6208
Epoch [12], Batch [300/313], Loss: 0.8656
Train set: Epoch: 12, Average loss:1.6631, LR: 0.001000 Top-1 Accuracy: 65.0600%, Top-5 Accuracy: 87.8375%, Time consumed:39.95s
Val set: Epoch: 12, Average loss:1.5331, Top-1 Accuracy: 58.1000%, Top-5 Accuracy: 86.1800%, Time consumed:4.88s

New best top-1 accuracy: 58.10%, top-5 accuracy: 86.18%
New best top-5 accuracy: 86.18%
Validation loss decreased (1.558218 --> 1.533124). Saving model ...


 12%|███████████                                                                                 | 12/100 [08:51<1:05:16, 44.50s/it]

Epoch [13], Batch [50/313], Loss: 1.6059
Epoch [13], Batch [100/313], Loss: 3.1278
Epoch [13], Batch [150/313], Loss: 1.6520
Epoch [13], Batch [200/313], Loss: 1.7702
Epoch [13], Batch [250/313], Loss: 1.5923
Epoch [13], Batch [300/313], Loss: 0.6529
Train set: Epoch: 13, Average loss:1.7279, LR: 0.001000 Top-1 Accuracy: 64.3625%, Top-5 Accuracy: 87.2850%, Time consumed:38.17s
Val set: Epoch: 13, Average loss:1.4508, Top-1 Accuracy: 60.6600%, Top-5 Accuracy: 86.7600%, Time consumed:5.14s

New best top-1 accuracy: 60.66%, top-5 accuracy: 86.76%
New best top-5 accuracy: 86.76%
Validation loss decreased (1.533124 --> 1.450814). Saving model ...


 13%|███████████▉                                                                                | 13/100 [09:35<1:04:07, 44.22s/it]

Epoch [14], Batch [50/313], Loss: 3.1650
Epoch [14], Batch [100/313], Loss: 0.9460
Epoch [14], Batch [150/313], Loss: 0.6072
Epoch [14], Batch [200/313], Loss: 0.5138
Epoch [14], Batch [250/313], Loss: 2.5388
Epoch [14], Batch [300/313], Loss: 0.7680
Train set: Epoch: 14, Average loss:1.5355, LR: 0.001000 Top-1 Accuracy: 68.2300%, Top-5 Accuracy: 88.6500%, Time consumed:38.77s


 14%|████████████▉                                                                               | 14/100 [10:18<1:03:01, 43.98s/it]

Val set: Epoch: 14, Average loss:1.5129, Top-1 Accuracy: 60.3600%, Top-5 Accuracy: 85.8000%, Time consumed:4.63s

EarlyStopping 카운터: 1 / 10
Epoch [15], Batch [50/313], Loss: 2.8411
Epoch [15], Batch [100/313], Loss: 1.1353
Epoch [15], Batch [150/313], Loss: 0.4348
Epoch [15], Batch [200/313], Loss: 2.5301
Epoch [15], Batch [250/313], Loss: 0.5535
Epoch [15], Batch [300/313], Loss: 2.9318
Train set: Epoch: 15, Average loss:1.4327, LR: 0.001000 Top-1 Accuracy: 72.2275%, Top-5 Accuracy: 91.0150%, Time consumed:39.59s
Val set: Epoch: 15, Average loss:1.4053, Top-1 Accuracy: 61.1200%, Top-5 Accuracy: 87.8000%, Time consumed:4.97s

New best top-1 accuracy: 61.12%, top-5 accuracy: 87.80%
New best top-5 accuracy: 87.80%
Validation loss decreased (1.450814 --> 1.405282). Saving model ...


 15%|█████████████▊                                                                              | 15/100 [11:03<1:02:39, 44.23s/it]

Epoch [16], Batch [50/313], Loss: 0.3682
Epoch [16], Batch [100/313], Loss: 2.6680
Epoch [16], Batch [150/313], Loss: 2.3585
Epoch [16], Batch [200/313], Loss: 2.7712
Epoch [16], Batch [250/313], Loss: 0.4261
Epoch [16], Batch [300/313], Loss: 2.8475
Train set: Epoch: 16, Average loss:1.4375, LR: 0.001000 Top-1 Accuracy: 73.1625%, Top-5 Accuracy: 91.0125%, Time consumed:41.35s


 16%|██████████████▋                                                                             | 16/100 [11:49<1:02:46, 44.84s/it]

Val set: Epoch: 16, Average loss:1.4126, Top-1 Accuracy: 62.0000%, Top-5 Accuracy: 87.2400%, Time consumed:4.76s

New best top-1 accuracy: 62.00%, top-5 accuracy: 87.24%
EarlyStopping 카운터: 1 / 10
Epoch [17], Batch [50/313], Loss: 0.3765
Epoch [17], Batch [100/313], Loss: 0.4438
Epoch [17], Batch [150/313], Loss: 0.4310
Epoch [17], Batch [200/313], Loss: 0.3496
Epoch [17], Batch [250/313], Loss: 0.4913
Epoch [17], Batch [300/313], Loss: 0.4471
Train set: Epoch: 17, Average loss:1.3209, LR: 0.001000 Top-1 Accuracy: 75.3475%, Top-5 Accuracy: 91.0200%, Time consumed:38.62s


 17%|███████████████▋                                                                            | 17/100 [12:33<1:01:33, 44.51s/it]

Val set: Epoch: 17, Average loss:1.4467, Top-1 Accuracy: 61.9400%, Top-5 Accuracy: 86.4000%, Time consumed:5.11s

EarlyStopping 카운터: 2 / 10
Epoch [18], Batch [50/313], Loss: 2.9908
Epoch [18], Batch [100/313], Loss: 2.5674
Epoch [18], Batch [150/313], Loss: 2.9298
Epoch [18], Batch [200/313], Loss: 0.3321
Epoch [18], Batch [250/313], Loss: 3.1753
Epoch [18], Batch [300/313], Loss: 0.4172
Train set: Epoch: 18, Average loss:1.3899, LR: 0.001000 Top-1 Accuracy: 74.2450%, Top-5 Accuracy: 91.0125%, Time consumed:38.40s


 18%|████████████████▌                                                                           | 18/100 [13:16<1:00:18, 44.13s/it]

Val set: Epoch: 18, Average loss:1.4178, Top-1 Accuracy: 62.7200%, Top-5 Accuracy: 87.5800%, Time consumed:4.73s

New best top-1 accuracy: 62.72%, top-5 accuracy: 87.58%
EarlyStopping 카운터: 3 / 10
Epoch [19], Batch [50/313], Loss: 0.3420
Epoch [19], Batch [100/313], Loss: 0.2586
Epoch [19], Batch [150/313], Loss: 1.3814
Epoch [19], Batch [200/313], Loss: 0.2950
Epoch [19], Batch [250/313], Loss: 0.3465
Epoch [19], Batch [300/313], Loss: 2.8744
Train set: Epoch: 19, Average loss:1.3302, LR: 0.001000 Top-1 Accuracy: 77.1225%, Top-5 Accuracy: 92.2000%, Time consumed:39.59s


 19%|█████████████████▊                                                                            | 19/100 [14:00<59:37, 44.17s/it]

Val set: Epoch: 19, Average loss:1.5090, Top-1 Accuracy: 60.8600%, Top-5 Accuracy: 86.3600%, Time consumed:4.67s

EarlyStopping 카운터: 4 / 10
Epoch [20], Batch [50/313], Loss: 2.4369
Epoch [20], Batch [100/313], Loss: 1.4266
Epoch [20], Batch [150/313], Loss: 2.9389
Epoch [20], Batch [200/313], Loss: 2.4595
Epoch [20], Batch [250/313], Loss: 0.1765
Epoch [20], Batch [300/313], Loss: 1.1663
Train set: Epoch: 20, Average loss:1.1738, LR: 0.001000 Top-1 Accuracy: 81.3000%, Top-5 Accuracy: 94.2950%, Time consumed:38.91s


 20%|██████████████████▊                                                                           | 20/100 [14:44<58:42, 44.03s/it]

Val set: Epoch: 20, Average loss:1.4604, Top-1 Accuracy: 61.9200%, Top-5 Accuracy: 86.5800%, Time consumed:4.79s

EarlyStopping 카운터: 5 / 10
Epoch [21], Batch [50/313], Loss: 0.2155
Epoch [21], Batch [100/313], Loss: 0.1157
Epoch [21], Batch [150/313], Loss: 1.6592
Epoch [21], Batch [200/313], Loss: 0.2540
Epoch [21], Batch [250/313], Loss: 0.2187
Epoch [21], Batch [300/313], Loss: 0.2118
Train set: Epoch: 21, Average loss:1.2035, LR: 0.001000 Top-1 Accuracy: 80.2500%, Top-5 Accuracy: 93.3675%, Time consumed:37.97s


 21%|███████████████████▋                                                                          | 21/100 [15:27<57:23, 43.59s/it]

Val set: Epoch: 21, Average loss:1.4766, Top-1 Accuracy: 61.6600%, Top-5 Accuracy: 86.2800%, Time consumed:4.60s

EarlyStopping 카운터: 6 / 10
Epoch [22], Batch [50/313], Loss: 2.1471
Epoch [22], Batch [100/313], Loss: 0.1810
Epoch [22], Batch [150/313], Loss: 0.1287
Epoch [22], Batch [200/313], Loss: 2.6588
Epoch [22], Batch [250/313], Loss: 2.4978
Epoch [22], Batch [300/313], Loss: 2.6249
Train set: Epoch: 22, Average loss:1.1372, LR: 0.001000 Top-1 Accuracy: 81.1300%, Top-5 Accuracy: 93.6250%, Time consumed:38.44s


 22%|████████████████████▋                                                                         | 22/100 [16:10<56:41, 43.61s/it]

Val set: Epoch: 22, Average loss:1.4733, Top-1 Accuracy: 61.4200%, Top-5 Accuracy: 86.5200%, Time consumed:5.21s

EarlyStopping 카운터: 7 / 10
Epoch [23], Batch [50/313], Loss: 0.1167
Epoch [23], Batch [100/313], Loss: 3.0907
Epoch [23], Batch [150/313], Loss: 0.1254
Epoch [23], Batch [200/313], Loss: 1.5300
Epoch [23], Batch [250/313], Loss: 1.0627
Epoch [23], Batch [300/313], Loss: 2.3783
Train set: Epoch: 23, Average loss:1.1479, LR: 0.001000 Top-1 Accuracy: 83.3750%, Top-5 Accuracy: 94.9475%, Time consumed:38.08s


 23%|█████████████████████▌                                                                        | 23/100 [16:53<55:39, 43.37s/it]

Val set: Epoch: 23, Average loss:1.4794, Top-1 Accuracy: 61.9600%, Top-5 Accuracy: 86.4800%, Time consumed:4.73s

EarlyStopping 카운터: 8 / 10
Epoch [24], Batch [50/313], Loss: 0.1095
Epoch [24], Batch [100/313], Loss: 0.1184
Epoch [24], Batch [150/313], Loss: 3.2137
Epoch [24], Batch [200/313], Loss: 0.1758
Epoch [24], Batch [250/313], Loss: 0.2048
Epoch [24], Batch [300/313], Loss: 0.1454
Train set: Epoch: 24, Average loss:1.1364, LR: 0.001000 Top-1 Accuracy: 82.0250%, Top-5 Accuracy: 94.2625%, Time consumed:38.61s


 24%|██████████████████████▌                                                                       | 24/100 [17:37<54:58, 43.40s/it]

Val set: Epoch: 24, Average loss:1.4784, Top-1 Accuracy: 62.5400%, Top-5 Accuracy: 85.4800%, Time consumed:4.84s

EarlyStopping 카운터: 9 / 10
Epoch [25], Batch [50/313], Loss: 0.0812
Epoch [25], Batch [100/313], Loss: 2.0912
Epoch [25], Batch [150/313], Loss: 2.7426
Epoch [25], Batch [200/313], Loss: 0.1393
Epoch [25], Batch [250/313], Loss: 1.6891
Epoch [25], Batch [300/313], Loss: 0.1409
Train set: Epoch: 25, Average loss:1.0345, LR: 0.001000 Top-1 Accuracy: 83.3400%, Top-5 Accuracy: 94.1300%, Time consumed:41.19s


 24%|██████████████████████▌                                                                       | 24/100 [18:23<58:14, 45.98s/it]

Val set: Epoch: 25, Average loss:1.4789, Top-1 Accuracy: 62.5200%, Top-5 Accuracy: 86.0400%, Time consumed:5.14s

EarlyStopping 카운터: 10 / 10
Early stopping triggered. Training stopped.
Loading best model from early stopping checkpoint...





Test set: Epoch: 100, Average loss:1.4649, Top-1 Accuracy: 63.0000%, Top-5 Accuracy: 86.6200%, Time consumed:4.64s

Finish! Best validation top-1 accuracy: 62.72%, Best validation top-5 accuracy: 87.80%
Final test top-1 accuracy: 63.00%, Final test top-5 accuracy: 86.62%
Total training time: 1108.09 seconds


0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇███
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy_top1,▁
test_accuracy_top5,▁
test_loss,▁
total_training_time,▁
train_accuracy_top1,▁▂▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇██████
train_accuracy_top5,▁▃▄▅▅▆▆▆▇▇▇▇▇▇███████████
train_loss,█▇▆▅▅▄▄▄▃▃▃▂▃▂▂▂▂▂▂▁▁▁▁▁▁
val_accuracy_top1,▁▃▄▅▆▆▆▇▇▇▇▇█████████████

0,1
best_val_accuracy_top1,62.72
best_val_accuracy_top5,87.8
early_stopped,True
early_stopped_epoch,25
epoch,25
learning_rate,0.001
test_accuracy_top1,63
test_accuracy_top5,86.62
test_loss,1.46495
total_training_time,1108.08851
