In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2  # CutMix를 위한 v2 transforms 추가
import sys
import os
import torch
import time
import random
import numpy as np
import wandb
from tqdm import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
from tools.tool import EarlyStopping
from models.resnet import resnet18, resnet34, resnet50

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="resnet50_cutmix,flip")  # CutMix 적용 실험임을 명시

# WandB 설정
config = {
    "model": "resnet50",
    "batch_size": 128,
    "num_epochs": 100,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
    "patience": 10,  # early stopping patience
    "train_ratio": 0.8,
    "val_ratio": 0.1,
    "test_ratio": 0.1,
    "cutmix_alpha": 1.0,  # CutMix 알파 파라미터 추가
    "cutmix_prob": 0.5    # CutMix 적용 확률 추가
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

full_trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

# Stratified 분할을 위한 준비 (train, validation 나누기)
# 모든 라벨을 추출
targets = np.array(full_trainset.targets)

# StratifiedShuffleSplit을 사용하여 8:1:1 비율로 분할
# 먼저 train과 validation을 나눔 (full_trainset에서 8:2)
train_val_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=config["seed"])
train_idx, temp_idx = next(train_val_split.split(np.zeros(len(targets)), targets))

# 그 다음 validation과 test를 나눔 (temp에서 1:1, 전체로 보면 1:1)
val_test_targets = targets[temp_idx]
val_test_split = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=config["seed"])
val_idx_temp, test_idx_temp = next(val_test_split.split(np.zeros(len(val_test_targets)), val_test_targets))

# 원래 인덱스로 매핑
val_idx = temp_idx[val_idx_temp]
test_idx = temp_idx[test_idx_temp]

# Subset 생성
trainset = Subset(full_trainset, train_idx)
valset = Subset(full_trainset, val_idx)
testset_split = Subset(full_trainset, test_idx)  # 원래 테스트셋 대신 stratified split에서 나온 테스트셋 사용

# DataLoader 생성
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)
valloader = DataLoader(valset, batch_size=config["batch_size"], shuffle=False, num_workers=16)
testloader = DataLoader(testset_split, batch_size=config["batch_size"], shuffle=False, num_workers=16)

print(f"Train set size: {len(trainset)}")
print(f"Validation set size: {len(valset)}")
print(f"Test set size: {len(testset_split)}")

# 추가: CutMix 변환 정의
cutmix = transforms_v2.CutMix(alpha=config["cutmix_alpha"], num_classes=100)  # CIFAR-100은 100개 클래스

# CutMix용 손실 함수 정의 (원-핫 인코딩된 레이블 처리)
def cutmix_criterion(outputs, targets):
    """
    CutMix로 혼합된 레이블을 처리하기 위한 손실 함수
    outputs: 모델 출력
    targets: CutMix로 생성된 원-핫 인코딩 레이블
    """
    return torch.nn.functional.cross_entropy(outputs, targets)

def train(model, trainloader, criterion, optimizer, device, epoch):
    """
    학습 함수 (CutMix 적용)
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # CutMix 확률적 적용
        if random.random() < config["cutmix_prob"]:
            inputs, labels = cutmix(inputs, labels)
            # 이 경우 labels은 원-핫 인코딩 형태로 변환됨
            use_cutmix = True
        else:
            use_cutmix = False
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        # CutMix 적용 여부에 따라 손실 함수 선택
        if use_cutmix:
            # CutMix가 적용된 경우 (원-핫 인코딩된 레이블)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 일반적인 경우 (정수 인덱스 레이블)
            loss = criterion(outputs, labels)
            
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # 정확도 계산 - CutMix 적용 여부에 따라 다르게 처리
        if use_cutmix:
            # 원-핫 인코딩된 레이블에서 argmax를 사용해 가장 큰 값의 인덱스 추출
            _, label_idx = labels.max(1)
        else:
            # 정수 인덱스 레이블 그대로 사용
            label_idx = labels
            
        # top-1 정확도 계산
        _, predicted = outputs.max(1)
        total += inputs.size(0)
        correct_top1 += predicted.eq(label_idx).sum().item()
        
        # top-5 정확도 계산
        _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
        correct_top5 += sum([1 for i in range(len(label_idx)) if label_idx[i] in top5_idx[i]])
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy_top1, accuracy_top5

def evaluate(model, dataloader, criterion, device, epoch, phase="val"):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    eval_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            # top-1 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            # top-5 정확도 계산
            _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
            correct_top5 += top5_idx.eq(labels.view(-1, 1).expand_as(top5_idx)).sum().item()
    
    # 평균 손실 및 정확도 계산
    eval_loss = eval_loss / len(dataloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력
    print(f'{phase.capitalize()} set: Epoch: {epoch+1}, Average loss:{eval_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return eval_loss, accuracy_top1, accuracy_top5


# 메인 학습 루프
def main_training_loop(model, trainloader, valloader, testloader, criterion, optimizer, device, num_epochs, patience):
    """
    메인 학습 루프
    """
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    best_acc_top1 = 0.0
    best_acc_top5 = 0.0
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc_top1, train_acc_top5 = train(model, trainloader, criterion, optimizer, device, epoch)
        
        # 평가
        val_loss, val_acc_top1, val_acc_top5 = evaluate(model, valloader, criterion, device, epoch, phase="val")
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy_top1": train_acc_top1,
            "train_accuracy_top5": train_acc_top5,
            "val_loss": val_loss,
            "val_accuracy_top1": val_acc_top1,
            "val_accuracy_top5": val_acc_top5
        })
            
        # 최고 정확도 모델 저장 (top-1 기준)
        if val_acc_top1 > best_acc_top1:
            best_acc_top1 = val_acc_top1
            best_acc_top5_at_best_top1 = val_acc_top5
            print(f'New best top-1 accuracy: {best_acc_top1:.2f}%, top-5 accuracy: {best_acc_top5_at_best_top1:.2f}%')
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
        
        # top-5 accuracy 기록 업데이트
        if val_acc_top5 > best_acc_top5:
            best_acc_top5 = val_acc_top5
            print(f'New best top-5 accuracy: {best_acc_top5:.2f}%')

        # Early stopping 체크 (validation loss 기준)
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered. Training stopped.")
            break
    
    # 훈련 완료 후 모델 평가 (best model 로드)
    if early_stopping.early_stop:
        print("Loading best model from early stopping checkpoint...")
    else:
        print("Loading best model based on validation accuracy...")
        model_path = f'best_model_{wandb.run.name}.pth'
        model.load_state_dict(torch.load(model_path))

    # 최종 테스트 세트 평가
    test_loss, test_acc_top1, test_acc_top5 = evaluate(model, testloader, criterion, device, num_epochs-1, phase="test")

    # 테스트 결과를 wandb 로그에 추가
    wandb.log({
        "epoch": epoch + 1,  # 마지막 에폭 또는 early stopping된 에폭
        "test_loss": test_loss,
        "test_accuracy_top1": test_acc_top1,
        "test_accuracy_top5": test_acc_top5
    })
        
    print(f'Finish! Best validation top-1 accuracy: {best_acc_top1:.2f}%, Best validation top-5 accuracy: {best_acc_top5:.2f}%')
    print(f'Final test top-1 accuracy: {test_acc_top1:.2f}%, Final test top-5 accuracy: {test_acc_top5:.2f}%')
    
    # WandB에 최종 결과 기록
    wandb.run.summary["best_val_accuracy_top1"] = best_acc_top1
    wandb.run.summary["best_val_accuracy_top5"] = best_acc_top5
    wandb.run.summary["test_accuracy_top1"] = test_acc_top1
    wandb.run.summary["test_accuracy_top5"] = test_acc_top5

    # Early stopping 정보 저장
    if early_stopping.early_stop:
        wandb.run.summary["early_stopped"] = True
        wandb.run.summary["early_stopped_epoch"] = epoch+1
    else:
        wandb.run.summary["early_stopped"] = False


# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
model = resnet50().to(device)  
criterion = nn.CrossEntropyLoss()  # 손실 함수 정의
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])  # 옵티마이저 정의

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    valloader=valloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"],
    patience=config["patience"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"Total training time: {total_time:.2f} seconds")

# WandB 실행 종료
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/guswls/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msokjh1310[0m ([33msokjh1310-hanyang-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Files already downloaded and verified
Files already downloaded and verified
Train set size: 40000
Validation set size: 5000
Test set size: 5000
Using device: cuda
2개의 GPU를 사용합니다.


  0%|                                                                                                       | 0/100 [00:00<?, ?it/s]

Epoch [1], Batch [50/313], Loss: 4.3900
Epoch [1], Batch [100/313], Loss: 4.1090
Epoch [1], Batch [150/313], Loss: 4.1082
Epoch [1], Batch [200/313], Loss: 3.9824
Epoch [1], Batch [250/313], Loss: 3.8416
Epoch [1], Batch [300/313], Loss: 3.5935
Train set: Epoch: 1, Average loss:4.2586, LR: 0.001000 Top-1 Accuracy: 6.2600%, Top-5 Accuracy: 22.0675%, Time consumed:80.07s
Val set: Epoch: 1, Average loss:3.8159, Top-1 Accuracy: 10.3800%, Top-5 Accuracy: 32.7800%, Time consumed:8.96s

New best top-1 accuracy: 10.38%, top-5 accuracy: 32.78%
New best top-5 accuracy: 32.78%
Validation loss decreased (inf --> 3.815898). Saving model ...


  1%|▉                                                                                            | 1/100 [01:29<2:27:44, 89.54s/it]

Epoch [2], Batch [50/313], Loss: 3.6662
Epoch [2], Batch [100/313], Loss: 4.1941
Epoch [2], Batch [150/313], Loss: 4.2280
Epoch [2], Batch [200/313], Loss: 3.4618
Epoch [2], Batch [250/313], Loss: 3.5255
Epoch [2], Batch [300/313], Loss: 3.8123
Train set: Epoch: 2, Average loss:3.7644, LR: 0.001000 Top-1 Accuracy: 13.6225%, Top-5 Accuracy: 37.7975%, Time consumed:82.50s
Val set: Epoch: 2, Average loss:3.3263, Top-1 Accuracy: 19.4800%, Top-5 Accuracy: 48.9200%, Time consumed:9.19s

New best top-1 accuracy: 19.48%, top-5 accuracy: 48.92%
New best top-5 accuracy: 48.92%
Validation loss decreased (3.815898 --> 3.326332). Saving model ...


  2%|█▊                                                                                           | 2/100 [03:01<2:28:49, 91.12s/it]

Epoch [3], Batch [50/313], Loss: 3.7211
Epoch [3], Batch [100/313], Loss: 2.9829
Epoch [3], Batch [150/313], Loss: 3.7029
Epoch [3], Batch [200/313], Loss: 4.0567
Epoch [3], Batch [250/313], Loss: 3.4003
Epoch [3], Batch [300/313], Loss: 4.0819
Train set: Epoch: 3, Average loss:3.2779, LR: 0.001000 Top-1 Accuracy: 22.0100%, Top-5 Accuracy: 51.7000%, Time consumed:79.49s
Val set: Epoch: 3, Average loss:2.8930, Top-1 Accuracy: 27.5200%, Top-5 Accuracy: 62.8400%, Time consumed:8.89s

New best top-1 accuracy: 27.52%, top-5 accuracy: 62.84%
New best top-5 accuracy: 62.84%
Validation loss decreased (3.326332 --> 2.893042). Saving model ...


  3%|██▊                                                                                          | 3/100 [04:30<2:25:41, 90.12s/it]

Epoch [4], Batch [50/313], Loss: 3.4523
Epoch [4], Batch [100/313], Loss: 2.7672
Epoch [4], Batch [150/313], Loss: 3.9901
Epoch [4], Batch [200/313], Loss: 2.8880
Epoch [4], Batch [250/313], Loss: 2.6688
Epoch [4], Batch [300/313], Loss: 2.3404
Train set: Epoch: 4, Average loss:3.0017, LR: 0.001000 Top-1 Accuracy: 28.6225%, Top-5 Accuracy: 60.3900%, Time consumed:80.86s
Val set: Epoch: 4, Average loss:3.1974, Top-1 Accuracy: 33.9200%, Top-5 Accuracy: 66.0400%, Time consumed:8.60s

New best top-1 accuracy: 33.92%, top-5 accuracy: 66.04%


  4%|███▋                                                                                         | 4/100 [06:00<2:23:56, 89.97s/it]

New best top-5 accuracy: 66.04%
EarlyStopping 카운터: 1 / 10
Epoch [5], Batch [50/313], Loss: 3.0353
Epoch [5], Batch [100/313], Loss: 2.1680
Epoch [5], Batch [150/313], Loss: 2.7582
Epoch [5], Batch [200/313], Loss: 2.0622
Epoch [5], Batch [250/313], Loss: 2.2801
Epoch [5], Batch [300/313], Loss: 2.1226
Train set: Epoch: 5, Average loss:2.7399, LR: 0.001000 Top-1 Accuracy: 35.1175%, Top-5 Accuracy: 66.9100%, Time consumed:81.84s
Val set: Epoch: 5, Average loss:2.6735, Top-1 Accuracy: 39.2800%, Top-5 Accuracy: 72.0000%, Time consumed:8.62s

New best top-1 accuracy: 39.28%, top-5 accuracy: 72.00%
New best top-5 accuracy: 72.00%
Validation loss decreased (2.893042 --> 2.673488). Saving model ...


  5%|████▋                                                                                        | 5/100 [07:31<2:23:01, 90.33s/it]

Epoch [6], Batch [50/313], Loss: 2.0449
Epoch [6], Batch [100/313], Loss: 2.5305
Epoch [6], Batch [150/313], Loss: 2.7565
Epoch [6], Batch [200/313], Loss: 3.5641
Epoch [6], Batch [250/313], Loss: 2.1251
Epoch [6], Batch [300/313], Loss: 1.9355
Train set: Epoch: 6, Average loss:2.6705, LR: 0.001000 Top-1 Accuracy: 38.2400%, Top-5 Accuracy: 69.6375%, Time consumed:79.04s
Val set: Epoch: 6, Average loss:2.7290, Top-1 Accuracy: 42.6600%, Top-5 Accuracy: 74.9000%, Time consumed:8.59s

New best top-1 accuracy: 42.66%, top-5 accuracy: 74.90%


  6%|█████▌                                                                                       | 6/100 [08:59<2:20:13, 89.51s/it]

New best top-5 accuracy: 74.90%
EarlyStopping 카운터: 1 / 10
Epoch [7], Batch [50/313], Loss: 3.3097
Epoch [7], Batch [100/313], Loss: 2.2907
Epoch [7], Batch [150/313], Loss: 2.9435
Epoch [7], Batch [200/313], Loss: 1.9167
Epoch [7], Batch [250/313], Loss: 1.7236
Epoch [7], Batch [300/313], Loss: 1.7637
Train set: Epoch: 7, Average loss:2.4306, LR: 0.001000 Top-1 Accuracy: 43.1125%, Top-5 Accuracy: 73.9650%, Time consumed:83.55s
Val set: Epoch: 7, Average loss:2.2669, Top-1 Accuracy: 48.1600%, Top-5 Accuracy: 79.4800%, Time consumed:8.31s

New best top-1 accuracy: 48.16%, top-5 accuracy: 79.48%
New best top-5 accuracy: 79.48%
Validation loss decreased (2.673488 --> 2.266854). Saving model ...


  7%|██████▌                                                                                      | 7/100 [10:31<2:20:11, 90.45s/it]

Epoch [8], Batch [50/313], Loss: 1.6513
Epoch [8], Batch [100/313], Loss: 1.5053
Epoch [8], Batch [150/313], Loss: 3.6494
Epoch [8], Batch [200/313], Loss: 1.8956
Epoch [8], Batch [250/313], Loss: 2.3423
Epoch [8], Batch [300/313], Loss: 1.9236
Train set: Epoch: 8, Average loss:2.3246, LR: 0.001000 Top-1 Accuracy: 45.1275%, Top-5 Accuracy: 75.3700%, Time consumed:81.73s


  8%|███████▍                                                                                     | 8/100 [12:02<2:18:49, 90.54s/it]

Val set: Epoch: 8, Average loss:2.6984, Top-1 Accuracy: 43.6400%, Top-5 Accuracy: 76.7800%, Time consumed:9.00s

EarlyStopping 카운터: 1 / 10
Epoch [9], Batch [50/313], Loss: 1.3532
Epoch [9], Batch [100/313], Loss: 1.5046
Epoch [9], Batch [150/313], Loss: 1.5240
Epoch [9], Batch [200/313], Loss: 3.6225
Epoch [9], Batch [250/313], Loss: 3.4138
Epoch [9], Batch [300/313], Loss: 3.2752
Train set: Epoch: 9, Average loss:2.2583, LR: 0.001000 Top-1 Accuracy: 48.0225%, Top-5 Accuracy: 77.3800%, Time consumed:81.56s
Val set: Epoch: 9, Average loss:2.3896, Top-1 Accuracy: 51.7000%, Top-5 Accuracy: 81.5600%, Time consumed:8.85s

New best top-1 accuracy: 51.70%, top-5 accuracy: 81.56%


  9%|████████▎                                                                                    | 9/100 [13:33<2:17:23, 90.59s/it]

New best top-5 accuracy: 81.56%
EarlyStopping 카운터: 2 / 10
Epoch [10], Batch [50/313], Loss: 1.3117
Epoch [10], Batch [100/313], Loss: 1.5127
Epoch [10], Batch [150/313], Loss: 1.3267
Epoch [10], Batch [200/313], Loss: 1.7004
Epoch [10], Batch [250/313], Loss: 3.0246
Epoch [10], Batch [300/313], Loss: 1.5497
Train set: Epoch: 10, Average loss:2.0418, LR: 0.001000 Top-1 Accuracy: 52.6850%, Top-5 Accuracy: 81.2375%, Time consumed:79.38s
Val set: Epoch: 10, Average loss:2.3230, Top-1 Accuracy: 54.8400%, Top-5 Accuracy: 82.9200%, Time consumed:8.76s

New best top-1 accuracy: 54.84%, top-5 accuracy: 82.92%


 10%|█████████▏                                                                                  | 10/100 [15:01<2:14:52, 89.91s/it]

New best top-5 accuracy: 82.92%
EarlyStopping 카운터: 3 / 10
Epoch [11], Batch [50/313], Loss: 1.4192
Epoch [11], Batch [100/313], Loss: 3.6538
Epoch [11], Batch [150/313], Loss: 3.0252
Epoch [11], Batch [200/313], Loss: 3.4628
Epoch [11], Batch [250/313], Loss: 1.4069
Epoch [11], Batch [300/313], Loss: 3.2864
Train set: Epoch: 11, Average loss:2.0300, LR: 0.001000 Top-1 Accuracy: 53.7325%, Top-5 Accuracy: 81.2800%, Time consumed:83.48s
Val set: Epoch: 11, Average loss:1.7560, Top-1 Accuracy: 53.1400%, Top-5 Accuracy: 83.0200%, Time consumed:9.86s

New best top-5 accuracy: 83.02%
Validation loss decreased (2.266854 --> 1.756017). Saving model ...


 11%|██████████                                                                                  | 11/100 [16:35<2:15:02, 91.04s/it]

Epoch [12], Batch [50/313], Loss: 2.9855
Epoch [12], Batch [100/313], Loss: 1.0805
Epoch [12], Batch [150/313], Loss: 1.1718
Epoch [12], Batch [200/313], Loss: 1.6149
Epoch [12], Batch [250/313], Loss: 1.3021
Epoch [12], Batch [300/313], Loss: 2.4411
Train set: Epoch: 12, Average loss:1.8758, LR: 0.001000 Top-1 Accuracy: 57.3800%, Top-5 Accuracy: 83.5450%, Time consumed:81.29s
Val set: Epoch: 12, Average loss:1.5581, Top-1 Accuracy: 57.2200%, Top-5 Accuracy: 85.9600%, Time consumed:9.32s

New best top-1 accuracy: 57.22%, top-5 accuracy: 85.96%
New best top-5 accuracy: 85.96%
Validation loss decreased (1.756017 --> 1.558083). Saving model ...


 12%|███████████                                                                                 | 12/100 [18:06<2:13:34, 91.07s/it]

Epoch [13], Batch [50/313], Loss: 2.3695
Epoch [13], Batch [100/313], Loss: 0.9344
Epoch [13], Batch [150/313], Loss: 1.0876
Epoch [13], Batch [200/313], Loss: 2.9033
Epoch [13], Batch [250/313], Loss: 0.9727
Epoch [13], Batch [300/313], Loss: 1.2209
Train set: Epoch: 13, Average loss:1.7977, LR: 0.001000 Top-1 Accuracy: 59.0425%, Top-5 Accuracy: 84.5525%, Time consumed:83.08s


 13%|███████████▉                                                                                | 13/100 [19:37<2:12:17, 91.23s/it]

Val set: Epoch: 13, Average loss:1.6493, Top-1 Accuracy: 57.0600%, Top-5 Accuracy: 85.1400%, Time consumed:8.52s

EarlyStopping 카운터: 1 / 10
Epoch [14], Batch [50/313], Loss: 3.0402
Epoch [14], Batch [100/313], Loss: 2.0803
Epoch [14], Batch [150/313], Loss: 1.5297
Epoch [14], Batch [200/313], Loss: 0.7293
Epoch [14], Batch [250/313], Loss: 2.9598
Epoch [14], Batch [300/313], Loss: 0.9774
Train set: Epoch: 14, Average loss:1.7306, LR: 0.001000 Top-1 Accuracy: 63.1475%, Top-5 Accuracy: 87.7075%, Time consumed:78.31s
Val set: Epoch: 14, Average loss:1.4431, Top-1 Accuracy: 60.2800%, Top-5 Accuracy: 87.3600%, Time consumed:9.08s

New best top-1 accuracy: 60.28%, top-5 accuracy: 87.36%
New best top-5 accuracy: 87.36%
Validation loss decreased (1.558083 --> 1.443097). Saving model ...


 14%|████████████▉                                                                               | 14/100 [21:05<2:09:19, 90.23s/it]

Epoch [15], Batch [50/313], Loss: 0.7277
Epoch [15], Batch [100/313], Loss: 3.1994
Epoch [15], Batch [150/313], Loss: 3.0909
Epoch [15], Batch [200/313], Loss: 2.8032
Epoch [15], Batch [250/313], Loss: 0.7641
Epoch [15], Batch [300/313], Loss: 0.8466
Train set: Epoch: 15, Average loss:1.7571, LR: 0.001000 Top-1 Accuracy: 62.4275%, Top-5 Accuracy: 86.9375%, Time consumed:78.71s
Val set: Epoch: 15, Average loss:1.4157, Top-1 Accuracy: 61.2200%, Top-5 Accuracy: 87.7800%, Time consumed:8.51s

New best top-1 accuracy: 61.22%, top-5 accuracy: 87.78%
New best top-5 accuracy: 87.78%
Validation loss decreased (1.443097 --> 1.415721). Saving model ...


 15%|█████████████▊                                                                              | 15/100 [22:33<2:06:45, 89.48s/it]

Epoch [16], Batch [50/313], Loss: 2.6627
Epoch [16], Batch [100/313], Loss: 2.9572
Epoch [16], Batch [150/313], Loss: 2.8578
Epoch [16], Batch [200/313], Loss: 2.6315
Epoch [16], Batch [250/313], Loss: 0.7596
Epoch [16], Batch [300/313], Loss: 2.8270
Train set: Epoch: 16, Average loss:1.5652, LR: 0.001000 Top-1 Accuracy: 67.0150%, Top-5 Accuracy: 89.1350%, Time consumed:77.45s
Val set: Epoch: 16, Average loss:1.3880, Top-1 Accuracy: 61.2200%, Top-5 Accuracy: 88.4600%, Time consumed:8.77s

New best top-5 accuracy: 88.46%
Validation loss decreased (1.415721 --> 1.388024). Saving model ...


 16%|██████████████▋                                                                             | 16/100 [24:00<2:04:00, 88.58s/it]

Epoch [17], Batch [50/313], Loss: 2.6770
Epoch [17], Batch [100/313], Loss: 2.5186
Epoch [17], Batch [150/313], Loss: 0.6497
Epoch [17], Batch [200/313], Loss: 2.9644
Epoch [17], Batch [250/313], Loss: 0.4711
Epoch [17], Batch [300/313], Loss: 2.0107
Train set: Epoch: 17, Average loss:1.5145, LR: 0.001000 Top-1 Accuracy: 68.7675%, Top-5 Accuracy: 89.8950%, Time consumed:77.49s


 17%|███████████████▋                                                                            | 17/100 [25:26<2:01:29, 87.83s/it]

Val set: Epoch: 17, Average loss:1.4378, Top-1 Accuracy: 60.4200%, Top-5 Accuracy: 87.6800%, Time consumed:8.59s

EarlyStopping 카운터: 1 / 10
Epoch [18], Batch [50/313], Loss: 1.7579
Epoch [18], Batch [100/313], Loss: 2.4723
Epoch [18], Batch [150/313], Loss: 0.7226
Epoch [18], Batch [200/313], Loss: 3.1106
Epoch [18], Batch [250/313], Loss: 2.3181
Epoch [18], Batch [300/313], Loss: 0.6899
Train set: Epoch: 18, Average loss:1.5069, LR: 0.001000 Top-1 Accuracy: 70.0425%, Top-5 Accuracy: 90.3450%, Time consumed:77.68s


 18%|████████████████▌                                                                           | 18/100 [26:52<1:59:25, 87.39s/it]

Val set: Epoch: 18, Average loss:1.4383, Top-1 Accuracy: 60.8000%, Top-5 Accuracy: 87.0600%, Time consumed:8.66s

EarlyStopping 카운터: 2 / 10
Epoch [19], Batch [50/313], Loss: 2.8707
Epoch [19], Batch [100/313], Loss: 0.4202
Epoch [19], Batch [150/313], Loss: 0.4152
Epoch [19], Batch [200/313], Loss: 0.5902
Epoch [19], Batch [250/313], Loss: 0.3313
Epoch [19], Batch [300/313], Loss: 2.7965
Train set: Epoch: 19, Average loss:1.3506, LR: 0.001000 Top-1 Accuracy: 72.4775%, Top-5 Accuracy: 90.6300%, Time consumed:79.46s
Val set: Epoch: 19, Average loss:1.3991, Top-1 Accuracy: 61.8600%, Top-5 Accuracy: 87.6400%, Time consumed:8.56s

New best top-1 accuracy: 61.86%, top-5 accuracy: 87.64%


 19%|█████████████████▍                                                                          | 19/100 [28:20<1:58:20, 87.65s/it]

EarlyStopping 카운터: 3 / 10
Epoch [20], Batch [50/313], Loss: 0.3532
Epoch [20], Batch [100/313], Loss: 2.2892
Epoch [20], Batch [150/313], Loss: 2.6096
Epoch [20], Batch [200/313], Loss: 0.4125
Epoch [20], Batch [250/313], Loss: 0.4420
Epoch [20], Batch [300/313], Loss: 3.0153
Train set: Epoch: 20, Average loss:1.3312, LR: 0.001000 Top-1 Accuracy: 75.3125%, Top-5 Accuracy: 92.3600%, Time consumed:78.29s
Val set: Epoch: 20, Average loss:1.4065, Top-1 Accuracy: 63.0600%, Top-5 Accuracy: 87.7800%, Time consumed:8.47s

New best top-1 accuracy: 63.06%, top-5 accuracy: 87.78%


 20%|██████████████████▍                                                                         | 20/100 [29:47<1:56:37, 87.46s/it]

EarlyStopping 카운터: 4 / 10
Epoch [21], Batch [50/313], Loss: 0.2979
Epoch [21], Batch [100/313], Loss: 0.3211
Epoch [21], Batch [150/313], Loss: 1.4220
Epoch [21], Batch [200/313], Loss: 1.0082
Epoch [21], Batch [250/313], Loss: 2.2436
Epoch [21], Batch [300/313], Loss: 2.2936
Train set: Epoch: 21, Average loss:1.2560, LR: 0.001000 Top-1 Accuracy: 76.4950%, Top-5 Accuracy: 92.8700%, Time consumed:80.69s


 21%|███████████████████▎                                                                        | 21/100 [31:17<1:55:53, 88.02s/it]

Val set: Epoch: 21, Average loss:1.5282, Top-1 Accuracy: 61.5800%, Top-5 Accuracy: 86.6800%, Time consumed:8.61s

EarlyStopping 카운터: 5 / 10
Epoch [22], Batch [50/313], Loss: 0.3305
Epoch [22], Batch [100/313], Loss: 1.9485
Epoch [22], Batch [150/313], Loss: 0.3277
Epoch [22], Batch [200/313], Loss: 0.2879
Epoch [22], Batch [250/313], Loss: 0.4624
Epoch [22], Batch [300/313], Loss: 0.2596
Train set: Epoch: 22, Average loss:1.2940, LR: 0.001000 Top-1 Accuracy: 76.4125%, Top-5 Accuracy: 92.8550%, Time consumed:79.16s


 22%|████████████████████▏                                                                       | 22/100 [32:44<1:54:11, 87.85s/it]

Val set: Epoch: 22, Average loss:1.3934, Top-1 Accuracy: 63.0600%, Top-5 Accuracy: 87.9200%, Time consumed:8.28s

EarlyStopping 카운터: 6 / 10
Epoch [23], Batch [50/313], Loss: 2.6959
Epoch [23], Batch [100/313], Loss: 0.2118
Epoch [23], Batch [150/313], Loss: 2.8110
Epoch [23], Batch [200/313], Loss: 2.2972
Epoch [23], Batch [250/313], Loss: 0.7375
Epoch [23], Batch [300/313], Loss: 2.5713
Train set: Epoch: 23, Average loss:1.3152, LR: 0.001000 Top-1 Accuracy: 76.8075%, Top-5 Accuracy: 92.7750%, Time consumed:77.10s


 23%|█████████████████████▏                                                                      | 23/100 [34:10<1:52:02, 87.31s/it]

Val set: Epoch: 23, Average loss:1.6885, Top-1 Accuracy: 62.1600%, Top-5 Accuracy: 87.1400%, Time consumed:8.94s

EarlyStopping 카운터: 7 / 10
Epoch [24], Batch [50/313], Loss: 0.3064
Epoch [24], Batch [100/313], Loss: 0.2254
Epoch [24], Batch [150/313], Loss: 2.4513
Epoch [24], Batch [200/313], Loss: 0.3027
Epoch [24], Batch [250/313], Loss: 2.6531
Epoch [24], Batch [300/313], Loss: 2.0898
Train set: Epoch: 24, Average loss:1.1562, LR: 0.001000 Top-1 Accuracy: 78.5900%, Top-5 Accuracy: 93.1875%, Time consumed:77.43s
Val set: Epoch: 24, Average loss:1.4627, Top-1 Accuracy: 63.1800%, Top-5 Accuracy: 87.3600%, Time consumed:8.33s

New best top-1 accuracy: 63.18%, top-5 accuracy: 87.36%


 24%|██████████████████████                                                                      | 24/100 [35:36<1:50:05, 86.92s/it]

EarlyStopping 카운터: 8 / 10
Epoch [25], Batch [50/313], Loss: 2.5090
Epoch [25], Batch [100/313], Loss: 0.1510
Epoch [25], Batch [150/313], Loss: 0.1405
Epoch [25], Batch [200/313], Loss: 0.1938
Epoch [25], Batch [250/313], Loss: 0.1619
Epoch [25], Batch [300/313], Loss: 2.6490
Train set: Epoch: 25, Average loss:1.0438, LR: 0.001000 Top-1 Accuracy: 82.3550%, Top-5 Accuracy: 94.8275%, Time consumed:78.57s
Val set: Epoch: 25, Average loss:1.4450, Top-1 Accuracy: 63.2600%, Top-5 Accuracy: 87.6800%, Time consumed:8.65s

New best top-1 accuracy: 63.26%, top-5 accuracy: 87.68%


 25%|███████████████████████                                                                     | 25/100 [37:04<1:48:52, 87.10s/it]

EarlyStopping 카운터: 9 / 10
Epoch [26], Batch [50/313], Loss: 0.0924
Epoch [26], Batch [100/313], Loss: 0.1286
Epoch [26], Batch [150/313], Loss: 2.5432
Epoch [26], Batch [200/313], Loss: 1.9826
Epoch [26], Batch [250/313], Loss: 0.2725
Epoch [26], Batch [300/313], Loss: 0.3138
Train set: Epoch: 26, Average loss:1.1265, LR: 0.001000 Top-1 Accuracy: 81.6075%, Top-5 Accuracy: 94.4425%, Time consumed:77.54s


 25%|███████████████████████                                                                     | 25/100 [38:29<1:55:29, 92.40s/it]

Val set: Epoch: 26, Average loss:1.5407, Top-1 Accuracy: 61.3600%, Top-5 Accuracy: 86.1800%, Time consumed:8.27s

EarlyStopping 카운터: 10 / 10
Early stopping triggered. Training stopped.
Loading best model from early stopping checkpoint...





Test set: Epoch: 100, Average loss:1.4863, Top-1 Accuracy: 61.6000%, Top-5 Accuracy: 85.7800%, Time consumed:10.06s

Finish! Best validation top-1 accuracy: 63.26%, Best validation top-5 accuracy: 88.46%
Final test top-1 accuracy: 61.60%, Final test top-5 accuracy: 85.78%
Total training time: 2319.96 seconds


0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇███
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy_top1,▁
test_accuracy_top5,▁
test_loss,▁
total_training_time,▁
train_accuracy_top1,▁▂▂▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇███
train_accuracy_top5,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇██████████
train_loss,█▇▆▅▅▅▄▄▄▃▃▃▃▂▃▂▂▂▂▂▁▂▂▁▁▁
val_accuracy_top1,▁▂▃▄▅▅▆▅▆▇▇▇▇█████████████

0,1
best_val_accuracy_top1,63.26
best_val_accuracy_top5,88.46
early_stopped,True
early_stopped_epoch,26
epoch,26
learning_rate,0.001
test_accuracy_top1,61.6
test_accuracy_top5,85.78
test_loss,1.48629
total_training_time,2319.96019
