In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2  # CutMix를 위한 v2 transforms 추가
import sys
import os
import time
import random
import numpy as np
import wandb
from tqdm import tqdm
from tools.tool import AccuracyEarlyStopping, WarmUpLR, SAM  # 수정된 AccuracyEarlyStopping 클래스 임포트
from models.resnet import resnet18

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="resnet18_cfc,lr=0.01,factor=0.5,SAM_standard")  

# WandB 설정
config = {
    "model": "resnet18",
    "batch_size": 128,
    "num_epochs": 300,
    "learning_rate": 0.01,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
    "patience": 30,  # early stopping patience
    "max_epochs_wait": float('inf'),  # 최대 30 에폭까지만 기다림
    "cutmix_alpha": 1.0,  # CutMix 알파 파라미터 추가
    "cutmix_prob": 0.5,   # CutMix 적용 확률 추가
    "crop_padding": 4,    # RandomCrop 패딩 크기
    "crop_size": 32,      # RandomCrop 크기 (CIFAR-100 이미지 크기는 32x32)
    "warmup_epochs": 5,   # 웜업할 에폭 수 추가
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드 - 기본 train/test 분할 사용
transform_train = transforms.Compose([
    transforms.RandomCrop(config["crop_size"], padding=config["crop_padding"]),  # 패딩 후 랜덤 크롭
    transforms.RandomHorizontalFlip(),  # 수평 뒤집기
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

# DataLoader 생성
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)
testloader = DataLoader(testset, batch_size=config["batch_size"], shuffle=False, num_workers=16)

print(f"Train set size: {len(trainset)}")
print(f"Test set size: {len(testset)}")

# CutMix 변환 정의
cutmix = transforms_v2.CutMix(alpha=config["cutmix_alpha"], num_classes=100)  # CIFAR-100은 100개 클래스

def train(model, trainloader, criterion, optimizer, device, epoch, warmup_scheduler=None, warmup_epochs=5):
    """
    학습 함수 (CutMix 적용)
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # CutMix 확률적 적용
        if random.random() < config["cutmix_prob"]:
            inputs, labels = cutmix(inputs, labels)
            # 이 경우 labels은 원-핫 인코딩 형태로 변환됨
            use_cutmix = True
        else:
            use_cutmix = False
        
        optimizer.zero_grad()
        outputs = model(inputs)
        
        # CutMix 적용 여부에 따라 손실 함수 선택
        if use_cutmix:
            # CutMix가 적용된 경우 (원-핫 인코딩된 레이블)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 일반적인 경우 (정수 인덱스 레이블)
            loss = criterion(outputs, labels)
            
        loss.backward()
        optimizer.first_step(zero_grad=True)


        
        # 두 번째 forward-backward 패스
        if use_cutmix:
            # 새로운 forward 패스 필요
            outputs = model(inputs)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 새로운 forward 패스 필요
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.second_step(zero_grad=True)

        # 학습률 스케줄러 업데이트 - warmup 스케줄러만 여기서 업데이트
        if epoch < warmup_epochs and warmup_scheduler is not None:
            warmup_scheduler.step()
        
        running_loss += loss.item()

        # 정확도 계산 - CutMix 적용 여부에 따라 다르게 처리
        if use_cutmix:
            # 원-핫 인코딩된 레이블에서 argmax를 사용해 가장 큰 값의 인덱스 추출
            _, label_idx = labels.max(1)
        else:
            # 정수 인덱스 레이블 그대로 사용
            label_idx = labels
        
    
        # top-1 정확도 계산
        _, predicted = outputs.max(1)
        total += inputs.size(0)
        correct_top1 += predicted.eq(label_idx).sum().item()
        
        # top-5 정확도 계산
        _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
        correct_top5 += sum([1 for i in range(len(label_idx)) if label_idx[i] in top5_idx[i]])
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy_top1, accuracy_top5

def evaluate(model, dataloader, criterion, device, epoch, phase="test"):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    eval_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            # top-1 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            # top-5 정확도 계산
            _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
            correct_top5 += top5_idx.eq(labels.view(-1, 1).expand_as(top5_idx)).sum().item()
    
    # 평균 손실 및 정확도 계산
    eval_loss = eval_loss / len(dataloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력
    print(f'{phase.capitalize()} set: Epoch: {epoch+1}, Average loss:{eval_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return eval_loss, accuracy_top1, accuracy_top5

# 메인 학습 루프
def main_training_loop(model, trainloader, testloader, criterion, optimizer, device, num_epochs, patience, max_epochs_wait, warmup_scheduler=None, main_scheduler=None, warmup_epochs=5):
    """
    메인 학습 루프 (accuracy 기준 early stopping)
    """
    # 정확도 기반 얼리 스토핑 사용
    early_stopping = AccuracyEarlyStopping(patience=patience, verbose=True, path='checkpoint.pt', max_epochs=max_epochs_wait)
    
    best_test_acc_top1 = 0.0
    best_test_acc_top5 = 0.0
    
    # 테스트 정확도 기록을 위한 리스트
    test_acc_top1_history = []
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc_top1, train_acc_top5 = train(
            model, 
            trainloader, 
            criterion, 
            optimizer, 
            device, 
            epoch, 
            warmup_scheduler, 
            warmup_epochs
        )
        
        # 테스트 데이터로 평가
        test_loss, test_acc_top1, test_acc_top5 = evaluate(model, testloader, criterion, device, epoch, phase="test")

        # 웜업 이후 ReduceLROnPlateau 스케줄러 업데이트 
        if epoch >= warmup_epochs and main_scheduler is not None:
            main_scheduler.step(test_acc_top1)  # 테스트 정확도에 따라 학습률 업데이트       
            
        # 테스트 정확도 기록
        test_acc_top1_history.append(test_acc_top1)
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy_top1": train_acc_top1,
            "train_accuracy_top5": train_acc_top5,
            "test_loss": test_loss,
            "test_accuracy_top1": test_acc_top1,
            "test_accuracy_top5": test_acc_top5
        })
            
        # 최고 정확도 모델 저장 (top-1 기준)
        if test_acc_top1 > best_test_acc_top1:
            best_test_acc_top1 = test_acc_top1
            best_test_acc_top5_at_best_top1 = test_acc_top5
            print(f'새로운 최고 top-1 정확도: {best_test_acc_top1:.2f}%, top-5 정확도: {best_test_acc_top5_at_best_top1:.2f}%')
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
        
        # top-5 accuracy 기록 업데이트
        if test_acc_top5 > best_test_acc_top5:
            best_test_acc_top5 = test_acc_top5
            print(f'새로운 최고 top-5 정확도: {best_test_acc_top5:.2f}%')

        # Early stopping 체크 (test_acc_top1 기준)
        early_stopping(test_acc_top1, model, epoch)
        if early_stopping.early_stop:
            print(f"에폭 {epoch+1}에서 학습 조기 종료. 최고 성능 에폭: {early_stopping.best_epoch+1}")
            break
    
    # 훈련 완료 후 최고 모델 로드
    print("테스트 정확도 기준 최고 모델 로드 중...")
    model_path = f'best_model_{wandb.run.name}.pth'
    model.load_state_dict(torch.load(model_path))

    # 최종 테스트 세트 평가
    final_test_loss, final_test_acc_top1, final_test_acc_top5 = evaluate(model, testloader, criterion, device, num_epochs-1, phase="test")
    
    print(f'완료! 최고 테스트 top-1 정확도: {best_test_acc_top1:.2f}%, 최고 테스트 top-5 정확도: {best_test_acc_top5:.2f}%')
    print(f'최종 테스트 top-1 정확도: {final_test_acc_top1:.2f}%, 최종 테스트 top-5 정확도: {final_test_acc_top5:.2f}%')
    
    # WandB에 최종 결과 기록
    wandb.run.summary["best_test_accuracy_top1"] = best_test_acc_top1
    wandb.run.summary["best_test_accuracy_top5"] = best_test_acc_top5
    wandb.run.summary["final_test_accuracy_top1"] = final_test_acc_top1
    wandb.run.summary["final_test_accuracy_top5"] = final_test_acc_top5

    # Early stopping 정보 저장
    if early_stopping.early_stop:
        wandb.run.summary["early_stopped"] = True
        wandb.run.summary["early_stopped_epoch"] = epoch+1
        wandb.run.summary["best_epoch"] = early_stopping.best_epoch+1
    else:
        wandb.run.summary["early_stopped"] = False


# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
# 또는 매개변수 커스터마이징
model = resnet18().to(device)
criterion = nn.CrossEntropyLoss()  # 기본 CrossEntropyLoss 사용 (라벨 스무딩 없음)
base_optimizer = optim.Adam
optimizer = SAM(model.parameters(), base_optimizer, lr=config["learning_rate"])

# WarmUpLR 스케줄러 초기화
# 웜업할 총 iteration 수 계산 (웜업 에폭 × 배치 수)
warmup_steps = config["warmup_epochs"] * len(trainloader)
warmup_scheduler = WarmUpLR(optimizer, total_iters=warmup_steps)

# 웜업 이후 사용할 스케줄러 설정 
main_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='max',           # 정확도를 모니터링하므로 'max' 모드 사용
    factor=0.5,           # 학습률 감소 비율 (5배 감소)
    patience=5,           # 몇 에폭 동안 개선이 없을 때 감소시킬지
    verbose=True,         # 학습률 변경 시 출력
    threshold=0.01,        # 개선으로 간주할 최소 변화량
    min_lr=1e-6
)

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"],
    patience=config["patience"],
    max_epochs_wait=config["max_epochs_wait"],
    warmup_scheduler=warmup_scheduler,
    main_scheduler=main_scheduler,
    warmup_epochs=config["warmup_epochs"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"전체 학습 시간: {total_time:.2f} 초")

# WandB 실행 종료
wandb.finish()



Files already downloaded and verified
Files already downloaded and verified
Train set size: 50000
Test set size: 10000
Using device: cuda
2개의 GPU를 사용합니다.








Epoch [1], Batch [50/391], Loss: 4.5961, LR: 0.000256
Epoch [1], Batch [100/391], Loss: 4.5056, LR: 0.000512
Epoch [1], Batch [150/391], Loss: 4.0830, LR: 0.000767
Epoch [1], Batch [200/391], Loss: 4.3724, LR: 0.001023
Epoch [1], Batch [250/391], Loss: 3.8148, LR: 0.001279
Epoch [1], Batch [300/391], Loss: 3.7220, LR: 0.001535
Epoch [1], Batch [350/391], Loss: 4.3313, LR: 0.001790
Train set: Epoch: 1, Average loss:4.2361, LR: 0.002000 Top-1 Accuracy: 6.3880%, Top-5 Accuracy: 22.4320%, Time consumed:91.87s
Test set: Epoch: 1, Average loss:3.7305, Top-1 Accuracy: 12.6700%, Top-5 Accuracy: 37.2200%, Time consumed:8.16s

새로운 최고 top-1 정확도: 12.67%, top-5 정확도: 37.22%
새로운 최고 top-5 정확도: 37.22%
Accuracy improved (-inf% --> 12.67%). Saving model ...


  0%|▎                                                                                           | 1/300 [01:40<8:19:36, 100.25s/it]

Epoch [2], Batch [50/391], Loss: 3.7938, LR: 0.002256
Epoch [2], Batch [100/391], Loss: 3.4379, LR: 0.002512
Epoch [2], Batch [150/391], Loss: 3.6116, LR: 0.002767
Epoch [2], Batch [200/391], Loss: 3.3739, LR: 0.003023
Epoch [2], Batch [250/391], Loss: 3.2603, LR: 0.003279
Epoch [2], Batch [300/391], Loss: 3.3991, LR: 0.003535
Epoch [2], Batch [350/391], Loss: 3.9479, LR: 0.003790
Train set: Epoch: 2, Average loss:3.7179, LR: 0.004000 Top-1 Accuracy: 14.1860%, Top-5 Accuracy: 39.0500%, Time consumed:94.84s
Test set: Epoch: 2, Average loss:3.2554, Top-1 Accuracy: 21.3300%, Top-5 Accuracy: 50.4000%, Time consumed:8.05s

새로운 최고 top-1 정확도: 21.33%, top-5 정확도: 50.40%
새로운 최고 top-5 정확도: 50.40%
Accuracy improved (12.67% --> 21.33%). Saving model ...


  1%|▌                                                                                           | 2/300 [03:23<8:26:26, 101.97s/it]

Epoch [3], Batch [50/391], Loss: 3.0326, LR: 0.004256
Epoch [3], Batch [100/391], Loss: 3.0342, LR: 0.004512
Epoch [3], Batch [150/391], Loss: 4.0415, LR: 0.004767
Epoch [3], Batch [200/391], Loss: 3.8093, LR: 0.005023
Epoch [3], Batch [250/391], Loss: 2.8377, LR: 0.005279
Epoch [3], Batch [300/391], Loss: 2.9366, LR: 0.005535
Epoch [3], Batch [350/391], Loss: 2.7980, LR: 0.005790
Train set: Epoch: 3, Average loss:3.3317, LR: 0.006000 Top-1 Accuracy: 21.8020%, Top-5 Accuracy: 50.9640%, Time consumed:90.53s
Test set: Epoch: 3, Average loss:2.6647, Top-1 Accuracy: 31.7800%, Top-5 Accuracy: 63.8200%, Time consumed:8.10s

새로운 최고 top-1 정확도: 31.78%, top-5 정확도: 63.82%
새로운 최고 top-5 정확도: 63.82%
Accuracy improved (21.33% --> 31.78%). Saving model ...


  1%|▉                                                                                           | 3/300 [05:02<8:17:54, 100.59s/it]

Epoch [4], Batch [50/391], Loss: 3.1691, LR: 0.006256
Epoch [4], Batch [100/391], Loss: 2.8669, LR: 0.006512
Epoch [4], Batch [150/391], Loss: 2.6220, LR: 0.006767
Epoch [4], Batch [200/391], Loss: 2.6368, LR: 0.007023
Epoch [4], Batch [250/391], Loss: 3.8379, LR: 0.007279
Epoch [4], Batch [300/391], Loss: 3.5700, LR: 0.007535
Epoch [4], Batch [350/391], Loss: 2.4219, LR: 0.007790
Train set: Epoch: 4, Average loss:2.9721, LR: 0.008000 Top-1 Accuracy: 29.2860%, Top-5 Accuracy: 60.6960%, Time consumed:93.83s
Test set: Epoch: 4, Average loss:2.3399, Top-1 Accuracy: 38.1600%, Top-5 Accuracy: 71.0900%, Time consumed:8.36s

새로운 최고 top-1 정확도: 38.16%, top-5 정확도: 71.09%
새로운 최고 top-5 정확도: 71.09%
Accuracy improved (31.78% --> 38.16%). Saving model ...


  1%|█▏                                                                                          | 4/300 [06:44<8:19:49, 101.32s/it]

Epoch [5], Batch [50/391], Loss: 3.8390, LR: 0.008256
Epoch [5], Batch [100/391], Loss: 3.3550, LR: 0.008512
Epoch [5], Batch [150/391], Loss: 2.1009, LR: 0.008767
Epoch [5], Batch [200/391], Loss: 3.2485, LR: 0.009023
Epoch [5], Batch [250/391], Loss: 2.1393, LR: 0.009279
Epoch [5], Batch [300/391], Loss: 2.1440, LR: 0.009535
Epoch [5], Batch [350/391], Loss: 2.0353, LR: 0.009790
Train set: Epoch: 5, Average loss:2.7229, LR: 0.010000 Top-1 Accuracy: 35.2300%, Top-5 Accuracy: 66.9600%, Time consumed:94.16s
Test set: Epoch: 5, Average loss:2.0010, Top-1 Accuracy: 46.0400%, Top-5 Accuracy: 77.8200%, Time consumed:9.04s

새로운 최고 top-1 정확도: 46.04%, top-5 정확도: 77.82%
새로운 최고 top-5 정확도: 77.82%
Accuracy improved (38.16% --> 46.04%). Saving model ...


  2%|█▌                                                                                          | 5/300 [08:28<8:21:56, 102.09s/it]

Epoch [6], Batch [50/391], Loss: 2.5787, LR: 0.010000
Epoch [6], Batch [100/391], Loss: 1.7364, LR: 0.010000
Epoch [6], Batch [150/391], Loss: 3.6242, LR: 0.010000
Epoch [6], Batch [200/391], Loss: 3.0872, LR: 0.010000
Epoch [6], Batch [250/391], Loss: 2.1317, LR: 0.010000
Epoch [6], Batch [300/391], Loss: 1.8930, LR: 0.010000
Epoch [6], Batch [350/391], Loss: 2.8879, LR: 0.010000
Train set: Epoch: 6, Average loss:2.6331, LR: 0.010000 Top-1 Accuracy: 38.7280%, Top-5 Accuracy: 69.2240%, Time consumed:96.24s
Test set: Epoch: 6, Average loss:1.8909, Top-1 Accuracy: 48.8300%, Top-5 Accuracy: 79.4000%, Time consumed:9.13s

새로운 최고 top-1 정확도: 48.83%, top-5 정확도: 79.40%
새로운 최고 top-5 정확도: 79.40%
Accuracy improved (46.04% --> 48.83%). Saving model ...


  2%|█▊                                                                                          | 6/300 [10:13<8:26:07, 103.29s/it]

Epoch [7], Batch [50/391], Loss: 3.5995, LR: 0.010000
Epoch [7], Batch [100/391], Loss: 3.5471, LR: 0.010000
Epoch [7], Batch [150/391], Loss: 1.5324, LR: 0.010000
Epoch [7], Batch [200/391], Loss: 1.5266, LR: 0.010000
Epoch [7], Batch [250/391], Loss: 3.6060, LR: 0.010000
Epoch [7], Batch [300/391], Loss: 3.4678, LR: 0.010000
Epoch [7], Batch [350/391], Loss: 2.1975, LR: 0.010000
Train set: Epoch: 7, Average loss:2.4396, LR: 0.010000 Top-1 Accuracy: 43.1080%, Top-5 Accuracy: 73.2060%, Time consumed:93.45s
Test set: Epoch: 7, Average loss:1.6691, Top-1 Accuracy: 54.1100%, Top-5 Accuracy: 83.2900%, Time consumed:8.50s

새로운 최고 top-1 정확도: 54.11%, top-5 정확도: 83.29%
새로운 최고 top-5 정확도: 83.29%
Accuracy improved (48.83% --> 54.11%). Saving model ...


  2%|██▏                                                                                         | 7/300 [11:56<8:22:40, 102.94s/it]

Epoch [8], Batch [50/391], Loss: 1.6941, LR: 0.010000
Epoch [8], Batch [100/391], Loss: 3.5938, LR: 0.010000
Epoch [8], Batch [150/391], Loss: 1.4686, LR: 0.010000
Epoch [8], Batch [200/391], Loss: 1.6023, LR: 0.010000
Epoch [8], Batch [250/391], Loss: 1.5122, LR: 0.010000
Epoch [8], Batch [300/391], Loss: 1.4355, LR: 0.010000
Epoch [8], Batch [350/391], Loss: 2.4312, LR: 0.010000
Train set: Epoch: 8, Average loss:2.3629, LR: 0.010000 Top-1 Accuracy: 45.7120%, Top-5 Accuracy: 75.1420%, Time consumed:90.92s


  3%|██▍                                                                                         | 8/300 [13:35<8:15:00, 101.71s/it]

Test set: Epoch: 8, Average loss:1.7279, Top-1 Accuracy: 53.6700%, Top-5 Accuracy: 82.2000%, Time consumed:8.17s

EarlyStopping 카운터: 1 / 30
Epoch [9], Batch [50/391], Loss: 1.4759, LR: 0.010000
Epoch [9], Batch [100/391], Loss: 3.3286, LR: 0.010000
Epoch [9], Batch [150/391], Loss: 1.5119, LR: 0.010000
Epoch [9], Batch [200/391], Loss: 1.6081, LR: 0.010000
Epoch [9], Batch [250/391], Loss: 3.5524, LR: 0.010000
Epoch [9], Batch [300/391], Loss: 1.6328, LR: 0.010000
Epoch [9], Batch [350/391], Loss: 3.2784, LR: 0.010000
Train set: Epoch: 9, Average loss:2.2001, LR: 0.010000 Top-1 Accuracy: 49.3040%, Top-5 Accuracy: 78.0700%, Time consumed:92.98s
Test set: Epoch: 9, Average loss:1.6357, Top-1 Accuracy: 55.7000%, Top-5 Accuracy: 83.7000%, Time consumed:8.19s

새로운 최고 top-1 정확도: 55.70%, top-5 정확도: 83.70%
새로운 최고 top-5 정확도: 83.70%
Accuracy improved (54.11% --> 55.70%). Saving model ...


  3%|██▊                                                                                         | 9/300 [15:16<8:12:51, 101.62s/it]

Epoch [10], Batch [50/391], Loss: 1.3945, LR: 0.010000
Epoch [10], Batch [100/391], Loss: 1.2465, LR: 0.010000
Epoch [10], Batch [150/391], Loss: 3.3033, LR: 0.010000
Epoch [10], Batch [200/391], Loss: 1.2888, LR: 0.010000
Epoch [10], Batch [250/391], Loss: 1.5038, LR: 0.010000
Epoch [10], Batch [300/391], Loss: 1.4028, LR: 0.010000
Epoch [10], Batch [350/391], Loss: 1.2001, LR: 0.010000
Train set: Epoch: 10, Average loss:2.0976, LR: 0.010000 Top-1 Accuracy: 51.9860%, Top-5 Accuracy: 80.2120%, Time consumed:90.05s
Test set: Epoch: 10, Average loss:1.6199, Top-1 Accuracy: 56.4500%, Top-5 Accuracy: 84.2300%, Time consumed:8.09s

새로운 최고 top-1 정확도: 56.45%, top-5 정확도: 84.23%
새로운 최고 top-5 정확도: 84.23%
Accuracy improved (55.70% --> 56.45%). Saving model ...


  3%|███                                                                                        | 10/300 [16:55<8:06:22, 100.63s/it]

Epoch [11], Batch [50/391], Loss: 1.2256, LR: 0.010000
Epoch [11], Batch [100/391], Loss: 1.2094, LR: 0.010000
Epoch [11], Batch [150/391], Loss: 1.5612, LR: 0.010000
Epoch [11], Batch [200/391], Loss: 1.3575, LR: 0.010000
Epoch [11], Batch [250/391], Loss: 1.2561, LR: 0.010000
Epoch [11], Batch [300/391], Loss: 2.8355, LR: 0.010000
Epoch [11], Batch [350/391], Loss: 1.3629, LR: 0.010000
Train set: Epoch: 11, Average loss:2.0274, LR: 0.010000 Top-1 Accuracy: 53.9100%, Top-5 Accuracy: 81.4060%, Time consumed:89.04s
Test set: Epoch: 11, Average loss:1.4695, Top-1 Accuracy: 59.3900%, Top-5 Accuracy: 85.7000%, Time consumed:8.38s

새로운 최고 top-1 정확도: 59.39%, top-5 정확도: 85.70%
새로운 최고 top-5 정확도: 85.70%
Accuracy improved (56.45% --> 59.39%). Saving model ...


  4%|███▎                                                                                        | 11/300 [18:32<8:00:19, 99.72s/it]

Epoch [12], Batch [50/391], Loss: 3.0174, LR: 0.010000
Epoch [12], Batch [100/391], Loss: 1.3715, LR: 0.010000
Epoch [12], Batch [150/391], Loss: 0.9964, LR: 0.010000
Epoch [12], Batch [200/391], Loss: 3.0314, LR: 0.010000
Epoch [12], Batch [250/391], Loss: 1.3879, LR: 0.010000
Epoch [12], Batch [300/391], Loss: 1.0072, LR: 0.010000
Epoch [12], Batch [350/391], Loss: 1.1371, LR: 0.010000
Train set: Epoch: 12, Average loss:1.9590, LR: 0.010000 Top-1 Accuracy: 55.5480%, Top-5 Accuracy: 82.4800%, Time consumed:89.18s
Test set: Epoch: 12, Average loss:1.3505, Top-1 Accuracy: 62.1400%, Top-5 Accuracy: 87.9800%, Time consumed:8.99s

새로운 최고 top-1 정확도: 62.14%, top-5 정확도: 87.98%
새로운 최고 top-5 정확도: 87.98%
Accuracy improved (59.39% --> 62.14%). Saving model ...


  4%|███▋                                                                                        | 12/300 [20:11<7:56:44, 99.32s/it]

Epoch [13], Batch [50/391], Loss: 3.3230, LR: 0.010000
Epoch [13], Batch [100/391], Loss: 3.3444, LR: 0.010000
Epoch [13], Batch [150/391], Loss: 1.3384, LR: 0.010000
Epoch [13], Batch [200/391], Loss: 3.0272, LR: 0.010000
Epoch [13], Batch [250/391], Loss: 3.3493, LR: 0.010000
Epoch [13], Batch [300/391], Loss: 1.9738, LR: 0.010000
Epoch [13], Batch [350/391], Loss: 3.1221, LR: 0.010000
Train set: Epoch: 13, Average loss:2.0172, LR: 0.010000 Top-1 Accuracy: 56.0540%, Top-5 Accuracy: 82.3700%, Time consumed:89.19s


  4%|███▉                                                                                        | 13/300 [21:49<7:53:05, 98.90s/it]

Test set: Epoch: 13, Average loss:1.4139, Top-1 Accuracy: 61.8000%, Top-5 Accuracy: 87.5000%, Time consumed:8.74s

EarlyStopping 카운터: 1 / 30
Epoch [14], Batch [50/391], Loss: 2.6200, LR: 0.010000
Epoch [14], Batch [100/391], Loss: 0.7389, LR: 0.010000
Epoch [14], Batch [150/391], Loss: 1.1539, LR: 0.010000
Epoch [14], Batch [200/391], Loss: 2.2253, LR: 0.010000
Epoch [14], Batch [250/391], Loss: 2.7622, LR: 0.010000
Epoch [14], Batch [300/391], Loss: 1.0358, LR: 0.010000
Epoch [14], Batch [350/391], Loss: 3.1959, LR: 0.010000
Train set: Epoch: 14, Average loss:1.9088, LR: 0.010000 Top-1 Accuracy: 58.3120%, Top-5 Accuracy: 84.1640%, Time consumed:88.32s
Test set: Epoch: 14, Average loss:1.3160, Top-1 Accuracy: 63.7900%, Top-5 Accuracy: 88.6300%, Time consumed:8.22s

새로운 최고 top-1 정확도: 63.79%, top-5 정확도: 88.63%
새로운 최고 top-5 정확도: 88.63%
Accuracy improved (62.14% --> 63.79%). Saving model ...


  5%|████▎                                                                                       | 14/300 [23:25<7:48:23, 98.26s/it]

Epoch [15], Batch [50/391], Loss: 1.0252, LR: 0.010000
Epoch [15], Batch [100/391], Loss: 2.9919, LR: 0.010000
Epoch [15], Batch [150/391], Loss: 1.1158, LR: 0.010000
Epoch [15], Batch [200/391], Loss: 2.9001, LR: 0.010000
Epoch [15], Batch [250/391], Loss: 2.6305, LR: 0.010000
Epoch [15], Batch [300/391], Loss: 0.7260, LR: 0.010000
Epoch [15], Batch [350/391], Loss: 1.0409, LR: 0.010000
Train set: Epoch: 15, Average loss:1.8724, LR: 0.010000 Top-1 Accuracy: 58.6980%, Top-5 Accuracy: 83.7240%, Time consumed:90.70s


  5%|████▌                                                                                       | 15/300 [25:04<7:47:44, 98.47s/it]

Test set: Epoch: 15, Average loss:1.3459, Top-1 Accuracy: 63.4300%, Top-5 Accuracy: 88.0500%, Time consumed:8.25s

EarlyStopping 카운터: 1 / 30
Epoch [16], Batch [50/391], Loss: 0.9209, LR: 0.010000
Epoch [16], Batch [100/391], Loss: 1.0218, LR: 0.010000
Epoch [16], Batch [150/391], Loss: 0.9059, LR: 0.010000
Epoch [16], Batch [200/391], Loss: 3.3158, LR: 0.010000
Epoch [16], Batch [250/391], Loss: 3.1980, LR: 0.010000
Epoch [16], Batch [300/391], Loss: 1.0154, LR: 0.010000
Epoch [16], Batch [350/391], Loss: 1.0829, LR: 0.010000
Train set: Epoch: 16, Average loss:1.8286, LR: 0.010000 Top-1 Accuracy: 59.9980%, Top-5 Accuracy: 84.6520%, Time consumed:87.96s
Test set: Epoch: 16, Average loss:1.2242, Top-1 Accuracy: 65.4000%, Top-5 Accuracy: 89.8900%, Time consumed:8.42s

새로운 최고 top-1 정확도: 65.40%, top-5 정확도: 89.89%
새로운 최고 top-5 정확도: 89.89%
Accuracy improved (63.79% --> 65.40%). Saving model ...


  5%|████▉                                                                                       | 16/300 [26:41<7:43:33, 97.94s/it]

Epoch [17], Batch [50/391], Loss: 1.6794, LR: 0.010000
Epoch [17], Batch [100/391], Loss: 2.0241, LR: 0.010000
Epoch [17], Batch [150/391], Loss: 2.6312, LR: 0.010000
Epoch [17], Batch [200/391], Loss: 0.8840, LR: 0.010000
Epoch [17], Batch [250/391], Loss: 2.7965, LR: 0.010000
Epoch [17], Batch [300/391], Loss: 2.7870, LR: 0.010000
Epoch [17], Batch [350/391], Loss: 1.4999, LR: 0.010000
Train set: Epoch: 17, Average loss:1.7685, LR: 0.010000 Top-1 Accuracy: 60.7520%, Top-5 Accuracy: 84.9500%, Time consumed:88.71s


  6%|█████▏                                                                                      | 17/300 [28:18<7:40:19, 97.60s/it]

Test set: Epoch: 17, Average loss:1.2480, Top-1 Accuracy: 65.3900%, Top-5 Accuracy: 89.1400%, Time consumed:8.08s

EarlyStopping 카운터: 1 / 30
Epoch [18], Batch [50/391], Loss: 0.6875, LR: 0.010000
Epoch [18], Batch [100/391], Loss: 0.8873, LR: 0.010000
Epoch [18], Batch [150/391], Loss: 2.0307, LR: 0.010000
Epoch [18], Batch [200/391], Loss: 0.7984, LR: 0.010000
Epoch [18], Batch [250/391], Loss: 3.2986, LR: 0.010000
Epoch [18], Batch [300/391], Loss: 2.1950, LR: 0.010000
Epoch [18], Batch [350/391], Loss: 2.8187, LR: 0.010000
Train set: Epoch: 18, Average loss:1.7113, LR: 0.010000 Top-1 Accuracy: 63.0800%, Top-5 Accuracy: 86.6840%, Time consumed:90.86s
Test set: Epoch: 18, Average loss:1.2431, Top-1 Accuracy: 65.6300%, Top-5 Accuracy: 89.6000%, Time consumed:8.03s

새로운 최고 top-1 정확도: 65.63%, top-5 정확도: 89.60%
Accuracy improved (65.40% --> 65.63%). Saving model ...


  6%|█████▌                                                                                      | 18/300 [29:57<7:40:55, 98.07s/it]

Epoch [19], Batch [50/391], Loss: 2.2731, LR: 0.010000
Epoch [19], Batch [100/391], Loss: 1.0484, LR: 0.010000
Epoch [19], Batch [150/391], Loss: 2.6294, LR: 0.010000
Epoch [19], Batch [200/391], Loss: 1.0181, LR: 0.010000
Epoch [19], Batch [250/391], Loss: 0.8232, LR: 0.010000
Epoch [19], Batch [300/391], Loss: 2.4790, LR: 0.010000
Epoch [19], Batch [350/391], Loss: 2.9758, LR: 0.010000
Train set: Epoch: 19, Average loss:1.7287, LR: 0.010000 Top-1 Accuracy: 62.4540%, Top-5 Accuracy: 85.9420%, Time consumed:94.42s
Test set: Epoch: 19, Average loss:1.2058, Top-1 Accuracy: 66.3000%, Top-5 Accuracy: 90.0400%, Time consumed:8.21s

새로운 최고 top-1 정확도: 66.30%, top-5 정확도: 90.04%
새로운 최고 top-5 정확도: 90.04%
Accuracy improved (65.63% --> 66.30%). Saving model ...


  6%|█████▊                                                                                      | 19/300 [31:40<7:46:05, 99.52s/it]

Epoch [20], Batch [50/391], Loss: 0.8161, LR: 0.010000
Epoch [20], Batch [100/391], Loss: 0.8804, LR: 0.010000
Epoch [20], Batch [150/391], Loss: 1.2676, LR: 0.010000
Epoch [20], Batch [200/391], Loss: 0.6969, LR: 0.010000
Epoch [20], Batch [250/391], Loss: 0.8443, LR: 0.010000
Epoch [20], Batch [300/391], Loss: 1.5636, LR: 0.010000
Epoch [20], Batch [350/391], Loss: 2.9584, LR: 0.010000
Train set: Epoch: 20, Average loss:1.6785, LR: 0.010000 Top-1 Accuracy: 64.8960%, Top-5 Accuracy: 87.5360%, Time consumed:89.76s


  7%|██████▏                                                                                     | 20/300 [33:18<7:42:35, 99.13s/it]

Test set: Epoch: 20, Average loss:1.2607, Top-1 Accuracy: 66.1500%, Top-5 Accuracy: 89.4500%, Time consumed:8.45s

EarlyStopping 카운터: 1 / 30
Epoch [21], Batch [50/391], Loss: 2.4809, LR: 0.010000
Epoch [21], Batch [100/391], Loss: 3.2015, LR: 0.010000
Epoch [21], Batch [150/391], Loss: 0.6789, LR: 0.010000
Epoch [21], Batch [200/391], Loss: 0.7783, LR: 0.010000
Epoch [21], Batch [250/391], Loss: 0.5351, LR: 0.010000
Epoch [21], Batch [300/391], Loss: 3.1097, LR: 0.010000
Epoch [21], Batch [350/391], Loss: 3.0556, LR: 0.010000
Train set: Epoch: 21, Average loss:1.7365, LR: 0.010000 Top-1 Accuracy: 63.4520%, Top-5 Accuracy: 86.3180%, Time consumed:90.19s
Test set: Epoch: 21, Average loss:1.1681, Top-1 Accuracy: 67.1400%, Top-5 Accuracy: 90.4600%, Time consumed:8.07s

새로운 최고 top-1 정확도: 67.14%, top-5 정확도: 90.46%
새로운 최고 top-5 정확도: 90.46%
Accuracy improved (66.30% --> 67.14%). Saving model ...


  7%|██████▍                                                                                     | 21/300 [34:57<7:40:08, 98.95s/it]

Epoch [22], Batch [50/391], Loss: 2.2588, LR: 0.010000
Epoch [22], Batch [100/391], Loss: 0.4932, LR: 0.010000
Epoch [22], Batch [150/391], Loss: 2.2418, LR: 0.010000
Epoch [22], Batch [200/391], Loss: 0.5837, LR: 0.010000
Epoch [22], Batch [250/391], Loss: 0.5170, LR: 0.010000
Epoch [22], Batch [300/391], Loss: 2.2442, LR: 0.010000
Epoch [22], Batch [350/391], Loss: 2.8448, LR: 0.010000
Train set: Epoch: 22, Average loss:1.5761, LR: 0.010000 Top-1 Accuracy: 66.6740%, Top-5 Accuracy: 88.1980%, Time consumed:88.98s
Test set: Epoch: 22, Average loss:1.1731, Top-1 Accuracy: 67.5000%, Top-5 Accuracy: 90.5400%, Time consumed:8.16s

새로운 최고 top-1 정확도: 67.50%, top-5 정확도: 90.54%
새로운 최고 top-5 정확도: 90.54%
Accuracy improved (67.14% --> 67.50%). Saving model ...


  7%|██████▋                                                                                     | 22/300 [36:34<7:36:18, 98.49s/it]

Epoch [23], Batch [50/391], Loss: 1.0986, LR: 0.010000
Epoch [23], Batch [100/391], Loss: 3.4793, LR: 0.010000
Epoch [23], Batch [150/391], Loss: 2.5732, LR: 0.010000
Epoch [23], Batch [200/391], Loss: 0.6118, LR: 0.010000
Epoch [23], Batch [250/391], Loss: 0.5637, LR: 0.010000
Epoch [23], Batch [300/391], Loss: 0.5179, LR: 0.010000
Epoch [23], Batch [350/391], Loss: 3.1281, LR: 0.010000
Train set: Epoch: 23, Average loss:1.5945, LR: 0.010000 Top-1 Accuracy: 67.1180%, Top-5 Accuracy: 88.2780%, Time consumed:94.20s
Test set: Epoch: 23, Average loss:1.1823, Top-1 Accuracy: 67.5100%, Top-5 Accuracy: 89.9800%, Time consumed:8.24s

새로운 최고 top-1 정확도: 67.51%, top-5 정확도: 89.98%
Accuracy improved (67.50% --> 67.51%). Saving model ...


  8%|███████                                                                                     | 23/300 [38:17<7:40:29, 99.74s/it]

Epoch [24], Batch [50/391], Loss: 0.4199, LR: 0.010000
Epoch [24], Batch [100/391], Loss: 2.4438, LR: 0.010000
Epoch [24], Batch [150/391], Loss: 0.6514, LR: 0.010000
Epoch [24], Batch [200/391], Loss: 0.5871, LR: 0.010000
Epoch [24], Batch [250/391], Loss: 0.5344, LR: 0.010000
Epoch [24], Batch [300/391], Loss: 0.6076, LR: 0.010000
Epoch [24], Batch [350/391], Loss: 0.7009, LR: 0.010000
Train set: Epoch: 24, Average loss:1.4884, LR: 0.010000 Top-1 Accuracy: 67.7540%, Top-5 Accuracy: 88.2160%, Time consumed:91.67s
Test set: Epoch: 24, Average loss:1.1726, Top-1 Accuracy: 67.8800%, Top-5 Accuracy: 90.0300%, Time consumed:8.36s

새로운 최고 top-1 정확도: 67.88%, top-5 정확도: 90.03%
Accuracy improved (67.51% --> 67.88%). Saving model ...


  8%|███████▎                                                                                    | 24/300 [39:57<7:39:33, 99.91s/it]

Epoch [25], Batch [50/391], Loss: 0.5745, LR: 0.010000
Epoch [25], Batch [100/391], Loss: 0.5118, LR: 0.010000
Epoch [25], Batch [150/391], Loss: 0.5782, LR: 0.010000
Epoch [25], Batch [200/391], Loss: 0.4586, LR: 0.010000
Epoch [25], Batch [250/391], Loss: 0.5844, LR: 0.010000
Epoch [25], Batch [300/391], Loss: 2.6163, LR: 0.010000
Epoch [25], Batch [350/391], Loss: 0.6092, LR: 0.010000
Train set: Epoch: 25, Average loss:1.5804, LR: 0.010000 Top-1 Accuracy: 67.3440%, Top-5 Accuracy: 88.2660%, Time consumed:94.32s
Test set: Epoch: 25, Average loss:1.1893, Top-1 Accuracy: 67.9000%, Top-5 Accuracy: 90.0100%, Time consumed:8.74s

새로운 최고 top-1 정확도: 67.90%, top-5 정확도: 90.01%
Accuracy improved (67.88% --> 67.90%). Saving model ...


  8%|███████▌                                                                                   | 25/300 [41:40<7:42:35, 100.93s/it]

Epoch [26], Batch [50/391], Loss: 0.4140, LR: 0.010000
Epoch [26], Batch [100/391], Loss: 0.5415, LR: 0.010000
Train set: Epoch: 26, Average loss:1.3923, LR: 0.010000 Top-1 Accuracy: 70.9580%, Top-5 Accuracy: 90.4360%, Time consumed:91.79s
Test set: Epoch: 26, Average loss:1.1603, Top-1 Accuracy: 68.3500%, Top-5 Accuracy: 90.7400%, Time consumed:8.17s

새로운 최고 top-1 정확도: 68.35%, top-5 정확도: 90.74%
새로운 최고 top-5 정확도: 90.74%
Accuracy improved (67.90% --> 68.35%). Saving model ...


  9%|███████▉                                                                                   | 26/300 [43:20<7:39:55, 100.71s/it]

Epoch [27], Batch [50/391], Loss: 0.9763, LR: 0.010000
Epoch [27], Batch [100/391], Loss: 0.3900, LR: 0.010000
