In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2  # CutMix를 위한 v2 transforms 추가
import sys
import os
import time
import random
import numpy as np
import wandb
from tqdm import tqdm
from tools.tool import AccuracyEarlyStopping, WarmUpLR  # 수정된 AccuracyEarlyStopping 클래스 임포트
from models.shake_resnet import shake_resnet18

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="shake_resnet18_cfc,warmup,reduce_standard")  

# WandB 설정
config = {
    "model": "shake_resnet18",
    "batch_size": 128,
    "num_epochs": 100,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
    "patience": 10,  # early stopping patience
    "max_epochs_wait": 30,  # 최대 30 에폭까지만 기다림
    "cutmix_alpha": 1.0,  # CutMix 알파 파라미터 추가
    "cutmix_prob": 0.5,   # CutMix 적용 확률 추가
    "crop_padding": 4,    # RandomCrop 패딩 크기
    "crop_size": 32,      # RandomCrop 크기 (CIFAR-100 이미지 크기는 32x32)
    "warmup_epochs": 5,   # 웜업할 에폭 수 추가
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드 - 기본 train/test 분할 사용
transform_train = transforms.Compose([
    transforms.RandomCrop(config["crop_size"], padding=config["crop_padding"]),  # 패딩 후 랜덤 크롭
    transforms.RandomHorizontalFlip(),  # 수평 뒤집기
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

# DataLoader 생성
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)
testloader = DataLoader(testset, batch_size=config["batch_size"], shuffle=False, num_workers=16)

print(f"Train set size: {len(trainset)}")
print(f"Test set size: {len(testset)}")

# CutMix 변환 정의
cutmix = transforms_v2.CutMix(alpha=config["cutmix_alpha"], num_classes=100)  # CIFAR-100은 100개 클래스

def train(model, trainloader, criterion, optimizer, device, epoch, warmup_scheduler=None, warmup_epochs=5):
    """
    학습 함수 (CutMix 적용)
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # CutMix 확률적 적용
        if random.random() < config["cutmix_prob"]:
            inputs, labels = cutmix(inputs, labels)
            # 이 경우 labels은 원-핫 인코딩 형태로 변환됨
            use_cutmix = True
        else:
            use_cutmix = False
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        # CutMix 적용 여부에 따라 손실 함수 선택
        if use_cutmix:
            # CutMix가 적용된 경우 (원-핫 인코딩된 레이블)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 일반적인 경우 (정수 인덱스 레이블)
            loss = criterion(outputs, labels)
            
        loss.backward()
        optimizer.step()

        # 학습률 스케줄러 업데이트 - warmup 스케줄러만 여기서 업데이트
        if epoch < warmup_epochs and warmup_scheduler is not None:
            warmup_scheduler.step()
        
        running_loss += loss.item()
        
        # 정확도 계산 - CutMix 적용 여부에 따라 다르게 처리
        if use_cutmix:
            # 원-핫 인코딩된 레이블에서 argmax를 사용해 가장 큰 값의 인덱스 추출
            _, label_idx = labels.max(1)
        else:
            # 정수 인덱스 레이블 그대로 사용
            label_idx = labels
            
        # top-1 정확도 계산
        _, predicted = outputs.max(1)
        total += inputs.size(0)
        correct_top1 += predicted.eq(label_idx).sum().item()
        
        # top-5 정확도 계산
        _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
        correct_top5 += sum([1 for i in range(len(label_idx)) if label_idx[i] in top5_idx[i]])
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy_top1, accuracy_top5

def evaluate(model, dataloader, criterion, device, epoch, phase="test"):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    eval_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            # top-1 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            # top-5 정확도 계산
            _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
            correct_top5 += top5_idx.eq(labels.view(-1, 1).expand_as(top5_idx)).sum().item()
    
    # 평균 손실 및 정확도 계산
    eval_loss = eval_loss / len(dataloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력
    print(f'{phase.capitalize()} set: Epoch: {epoch+1}, Average loss:{eval_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return eval_loss, accuracy_top1, accuracy_top5

# 메인 학습 루프
def main_training_loop(model, trainloader, testloader, criterion, optimizer, device, num_epochs, patience, max_epochs_wait, warmup_scheduler=None, main_scheduler=None, warmup_epochs=5):
    """
    메인 학습 루프 (accuracy 기준 early stopping)
    """
    # 정확도 기반 얼리 스토핑 사용
    early_stopping = AccuracyEarlyStopping(patience=patience, verbose=True, path='checkpoint.pt', max_epochs=max_epochs_wait)
    
    best_test_acc_top1 = 0.0
    best_test_acc_top5 = 0.0
    
    # 테스트 정확도 기록을 위한 리스트
    test_acc_top1_history = []
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc_top1, train_acc_top5 = train(
            model, 
            trainloader, 
            criterion, 
            optimizer, 
            device, 
            epoch, 
            warmup_scheduler, 
            warmup_epochs
        )
        
        # 테스트 데이터로 평가
        test_loss, test_acc_top1, test_acc_top5 = evaluate(model, testloader, criterion, device, epoch, phase="test")

        # 웜업 이후 ReduceLROnPlateau 스케줄러 업데이트 
        if epoch >= warmup_epochs and main_scheduler is not None:
            main_scheduler.step(test_acc_top1)  # 테스트 정확도에 따라 학습률 업데이트       
            
        # 테스트 정확도 기록
        test_acc_top1_history.append(test_acc_top1)
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy_top1": train_acc_top1,
            "train_accuracy_top5": train_acc_top5,
            "test_loss": test_loss,
            "test_accuracy_top1": test_acc_top1,
            "test_accuracy_top5": test_acc_top5
        })
            
        # 최고 정확도 모델 저장 (top-1 기준)
        if test_acc_top1 > best_test_acc_top1:
            best_test_acc_top1 = test_acc_top1
            best_test_acc_top5_at_best_top1 = test_acc_top5
            print(f'새로운 최고 top-1 정확도: {best_test_acc_top1:.2f}%, top-5 정확도: {best_test_acc_top5_at_best_top1:.2f}%')
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
        
        # top-5 accuracy 기록 업데이트
        if test_acc_top5 > best_test_acc_top5:
            best_test_acc_top5 = test_acc_top5
            print(f'새로운 최고 top-5 정확도: {best_test_acc_top5:.2f}%')

        # Early stopping 체크 (test_acc_top1 기준)
        early_stopping(test_acc_top1, model, epoch)
        if early_stopping.early_stop:
            print(f"에폭 {epoch+1}에서 학습 조기 종료. 최고 성능 에폭: {early_stopping.best_epoch+1}")
            break
    
    # 훈련 완료 후 최고 모델 로드
    print("테스트 정확도 기준 최고 모델 로드 중...")
    model_path = f'best_model_{wandb.run.name}.pth'
    model.load_state_dict(torch.load(model_path))

    # 최종 테스트 세트 평가
    final_test_loss, final_test_acc_top1, final_test_acc_top5 = evaluate(model, testloader, criterion, device, num_epochs-1, phase="test")
    
    print(f'완료! 최고 테스트 top-1 정확도: {best_test_acc_top1:.2f}%, 최고 테스트 top-5 정확도: {best_test_acc_top5:.2f}%')
    print(f'최종 테스트 top-1 정확도: {final_test_acc_top1:.2f}%, 최종 테스트 top-5 정확도: {final_test_acc_top5:.2f}%')
    
    # WandB에 최종 결과 기록
    wandb.run.summary["best_test_accuracy_top1"] = best_test_acc_top1
    wandb.run.summary["best_test_accuracy_top5"] = best_test_acc_top5
    wandb.run.summary["final_test_accuracy_top1"] = final_test_acc_top1
    wandb.run.summary["final_test_accuracy_top5"] = final_test_acc_top5

    # Early stopping 정보 저장
    if early_stopping.early_stop:
        wandb.run.summary["early_stopped"] = True
        wandb.run.summary["early_stopped_epoch"] = epoch+1
        wandb.run.summary["best_epoch"] = early_stopping.best_epoch+1
    else:
        wandb.run.summary["early_stopped"] = False


# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
# 또는 매개변수 커스터마이징
model = shake_resnet18(num_classes=100, p_drop=0.5, alpha_range=[0, 0]).to(device)
criterion = nn.CrossEntropyLoss()  # 기본 CrossEntropyLoss 사용 (라벨 스무딩 없음)
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])  # 옵티마이저 정의

# WarmUpLR 스케줄러 초기화
# 웜업할 총 iteration 수 계산 (웜업 에폭 × 배치 수)
warmup_steps = config["warmup_epochs"] * len(trainloader)
warmup_scheduler = WarmUpLR(optimizer, total_iters=warmup_steps)

# 웜업 이후 사용할 스케줄러 설정 
main_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='max',           # 정확도를 모니터링하므로 'max' 모드 사용
    factor=0.1,           # 학습률 감소 비율 (10배 감소)
    patience=3,           # 몇 에폭 동안 개선이 없을 때 감소시킬지
    verbose=True,         # 학습률 변경 시 출력
    threshold=0.01        # 개선으로 간주할 최소 변화량
)

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"],
    patience=config["patience"],
    max_epochs_wait=config["max_epochs_wait"],
    warmup_scheduler=warmup_scheduler,
    main_scheduler=main_scheduler,
    warmup_epochs=config["warmup_epochs"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"전체 학습 시간: {total_time:.2f} 초")

# WandB 실행 종료
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/guswls/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msokjh1310[0m ([33msokjh1310-hanyang-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Files already downloaded and verified
Files already downloaded and verified
Train set size: 50000
Test set size: 10000
Using device: cuda




2개의 GPU를 사용합니다.


  gate = torch.cuda.FloatTensor([0]).bernoulli_(1 - p_drop)


Epoch [1], Batch [50/391], Loss: 4.6380, LR: 0.000026
Epoch [1], Batch [100/391], Loss: 4.3825, LR: 0.000051
Epoch [1], Batch [150/391], Loss: 4.4524, LR: 0.000077
Epoch [1], Batch [200/391], Loss: 4.4531, LR: 0.000102
Epoch [1], Batch [250/391], Loss: 4.4495, LR: 0.000128
Epoch [1], Batch [300/391], Loss: 4.4122, LR: 0.000153
Epoch [1], Batch [350/391], Loss: 4.5384, LR: 0.000179
Train set: Epoch: 1, Average loss:4.3672, LR: 0.000200 Top-1 Accuracy: 5.1000%, Top-5 Accuracy: 17.8700%, Time consumed:51.41s
Test set: Epoch: 1, Average loss:3.8997, Top-1 Accuracy: 9.8500%, Top-5 Accuracy: 30.6800%, Time consumed:7.93s

새로운 최고 top-1 정확도: 9.85%, top-5 정확도: 30.68%
새로운 최고 top-5 정확도: 30.68%
Accuracy improved (-inf% --> 9.85%). Saving model ...


  1%|▉                                                                                            | 1/100 [00:59<1:38:21, 59.61s/it]

Epoch [2], Batch [50/391], Loss: 4.0792, LR: 0.000226
Epoch [2], Batch [100/391], Loss: 4.0552, LR: 0.000251
Epoch [2], Batch [150/391], Loss: 4.0377, LR: 0.000277
Epoch [2], Batch [200/391], Loss: 4.0305, LR: 0.000302
Epoch [2], Batch [250/391], Loss: 3.9174, LR: 0.000328
Epoch [2], Batch [300/391], Loss: 4.4218, LR: 0.000353
Epoch [2], Batch [350/391], Loss: 3.9992, LR: 0.000379
Train set: Epoch: 2, Average loss:4.1287, LR: 0.000400 Top-1 Accuracy: 8.5280%, Top-5 Accuracy: 26.9660%, Time consumed:52.36s
Test set: Epoch: 2, Average loss:3.8637, Top-1 Accuracy: 11.5600%, Top-5 Accuracy: 34.3200%, Time consumed:8.41s

새로운 최고 top-1 정확도: 11.56%, top-5 정확도: 34.32%
새로운 최고 top-5 정확도: 34.32%
Accuracy improved (9.85% --> 11.56%). Saving model ...


  2%|█▊                                                                                           | 2/100 [02:00<1:38:43, 60.45s/it]

Epoch [3], Batch [50/391], Loss: 4.0234, LR: 0.000426
Epoch [3], Batch [100/391], Loss: 4.2621, LR: 0.000451
Epoch [3], Batch [150/391], Loss: 4.4022, LR: 0.000477
Epoch [3], Batch [200/391], Loss: 4.4229, LR: 0.000502
Epoch [3], Batch [250/391], Loss: 3.5897, LR: 0.000528
Epoch [3], Batch [300/391], Loss: 3.9071, LR: 0.000553
Epoch [3], Batch [350/391], Loss: 4.0210, LR: 0.000579
Train set: Epoch: 3, Average loss:4.0555, LR: 0.000600 Top-1 Accuracy: 9.9920%, Top-5 Accuracy: 29.9100%, Time consumed:55.14s
Test set: Epoch: 3, Average loss:3.7741, Top-1 Accuracy: 13.2200%, Top-5 Accuracy: 35.3200%, Time consumed:8.15s

새로운 최고 top-1 정확도: 13.22%, top-5 정확도: 35.32%
새로운 최고 top-5 정확도: 35.32%
Accuracy improved (11.56% --> 13.22%). Saving model ...


  3%|██▊                                                                                          | 3/100 [03:04<1:40:00, 61.86s/it]

Epoch [4], Batch [50/391], Loss: 4.1712, LR: 0.000626
Epoch [4], Batch [100/391], Loss: 4.1509, LR: 0.000651
Epoch [4], Batch [150/391], Loss: 3.8164, LR: 0.000677
Epoch [4], Batch [200/391], Loss: 4.0641, LR: 0.000702
Epoch [4], Batch [250/391], Loss: 4.1217, LR: 0.000728
Epoch [4], Batch [300/391], Loss: 3.6913, LR: 0.000753
Epoch [4], Batch [350/391], Loss: 4.1850, LR: 0.000779
Train set: Epoch: 4, Average loss:3.9936, LR: 0.000800 Top-1 Accuracy: 10.8800%, Top-5 Accuracy: 31.8100%, Time consumed:52.74s
Test set: Epoch: 4, Average loss:3.5230, Top-1 Accuracy: 16.3500%, Top-5 Accuracy: 42.0800%, Time consumed:8.31s

새로운 최고 top-1 정확도: 16.35%, top-5 정확도: 42.08%
새로운 최고 top-5 정확도: 42.08%
Accuracy improved (13.22% --> 16.35%). Saving model ...


  4%|███▋                                                                                         | 4/100 [04:05<1:38:38, 61.65s/it]

Epoch [5], Batch [50/391], Loss: 4.0862, LR: 0.000826
Epoch [5], Batch [100/391], Loss: 4.0172, LR: 0.000851
Epoch [5], Batch [150/391], Loss: 3.8763, LR: 0.000877
Epoch [5], Batch [200/391], Loss: 4.3836, LR: 0.000902
Epoch [5], Batch [250/391], Loss: 3.4747, LR: 0.000928
Epoch [5], Batch [300/391], Loss: 4.4083, LR: 0.000953
Epoch [5], Batch [350/391], Loss: 4.3984, LR: 0.000979
Train set: Epoch: 5, Average loss:3.9236, LR: 0.001000 Top-1 Accuracy: 11.9300%, Top-5 Accuracy: 33.7840%, Time consumed:51.12s


  5%|████▋                                                                                        | 5/100 [05:05<1:36:24, 60.89s/it]

Test set: Epoch: 5, Average loss:3.6796, Top-1 Accuracy: 14.3200%, Top-5 Accuracy: 37.3200%, Time consumed:8.43s

EarlyStopping 카운터: 1 / 10
Epoch [6], Batch [50/391], Loss: 3.6534, LR: 0.001000
Epoch [6], Batch [100/391], Loss: 3.7935, LR: 0.001000
Epoch [6], Batch [150/391], Loss: 4.0699, LR: 0.001000
Epoch [6], Batch [200/391], Loss: 3.2789, LR: 0.001000
Epoch [6], Batch [250/391], Loss: 3.9950, LR: 0.001000
Epoch [6], Batch [300/391], Loss: 4.1114, LR: 0.001000
Epoch [6], Batch [350/391], Loss: 4.2594, LR: 0.001000
Train set: Epoch: 6, Average loss:3.9371, LR: 0.001000 Top-1 Accuracy: 12.0380%, Top-5 Accuracy: 33.4640%, Time consumed:51.07s
Test set: Epoch: 6, Average loss:3.4170, Top-1 Accuracy: 19.3100%, Top-5 Accuracy: 45.2100%, Time consumed:8.14s

새로운 최고 top-1 정확도: 19.31%, top-5 정확도: 45.21%
새로운 최고 top-5 정확도: 45.21%
Accuracy improved (16.35% --> 19.31%). Saving model ...


  6%|█████▌                                                                                       | 6/100 [06:04<1:34:37, 60.40s/it]

Epoch [7], Batch [50/391], Loss: 4.6205, LR: 0.001000
Epoch [7], Batch [100/391], Loss: 3.6430, LR: 0.001000
Epoch [7], Batch [150/391], Loss: 3.4554, LR: 0.001000
Epoch [7], Batch [200/391], Loss: 3.9821, LR: 0.001000
Epoch [7], Batch [250/391], Loss: 3.7295, LR: 0.001000
Epoch [7], Batch [300/391], Loss: 3.6430, LR: 0.001000
Epoch [7], Batch [350/391], Loss: 4.4457, LR: 0.001000
Train set: Epoch: 7, Average loss:3.9130, LR: 0.001000 Top-1 Accuracy: 12.5140%, Top-5 Accuracy: 34.1700%, Time consumed:50.97s
Test set: Epoch: 7, Average loss:3.3492, Top-1 Accuracy: 20.6500%, Top-5 Accuracy: 47.7300%, Time consumed:7.96s

새로운 최고 top-1 정확도: 20.65%, top-5 정확도: 47.73%
새로운 최고 top-5 정확도: 47.73%
Accuracy improved (19.31% --> 20.65%). Saving model ...


  7%|██████▌                                                                                      | 7/100 [07:03<1:32:59, 59.99s/it]

Epoch [8], Batch [50/391], Loss: 3.8744, LR: 0.001000
Epoch [8], Batch [100/391], Loss: 3.9770, LR: 0.001000
Epoch [8], Batch [150/391], Loss: 4.4167, LR: 0.001000
Epoch [8], Batch [200/391], Loss: 3.8905, LR: 0.001000
Epoch [8], Batch [250/391], Loss: 3.8063, LR: 0.001000
Epoch [8], Batch [300/391], Loss: 4.2357, LR: 0.001000
Epoch [8], Batch [350/391], Loss: 3.5927, LR: 0.001000
Train set: Epoch: 8, Average loss:3.9134, LR: 0.001000 Top-1 Accuracy: 12.1620%, Top-5 Accuracy: 34.2700%, Time consumed:50.58s


  8%|███████▍                                                                                     | 8/100 [08:02<1:31:31, 59.69s/it]

Test set: Epoch: 8, Average loss:3.3235, Top-1 Accuracy: 20.2500%, Top-5 Accuracy: 48.3400%, Time consumed:8.44s

새로운 최고 top-5 정확도: 48.34%
EarlyStopping 카운터: 1 / 10
Epoch [9], Batch [50/391], Loss: 3.7858, LR: 0.001000
Epoch [9], Batch [100/391], Loss: 4.3891, LR: 0.001000
Epoch [9], Batch [150/391], Loss: 3.8628, LR: 0.001000
Epoch [9], Batch [200/391], Loss: 3.8162, LR: 0.001000
Epoch [9], Batch [250/391], Loss: 4.2204, LR: 0.001000
Epoch [9], Batch [300/391], Loss: 3.4191, LR: 0.001000
Epoch [9], Batch [350/391], Loss: 3.6804, LR: 0.001000
Train set: Epoch: 9, Average loss:3.9440, LR: 0.001000 Top-1 Accuracy: 12.2920%, Top-5 Accuracy: 34.0640%, Time consumed:54.60s


  9%|████████▎                                                                                    | 9/100 [09:05<1:32:11, 60.79s/it]

Test set: Epoch: 9, Average loss:3.3320, Top-1 Accuracy: 19.2300%, Top-5 Accuracy: 48.2200%, Time consumed:8.61s

EarlyStopping 카운터: 2 / 10
Epoch [10], Batch [50/391], Loss: 3.7683, LR: 0.001000
Epoch [10], Batch [100/391], Loss: 4.1273, LR: 0.001000
Epoch [10], Batch [150/391], Loss: 4.2528, LR: 0.001000
Epoch [10], Batch [200/391], Loss: 4.5668, LR: 0.001000
Epoch [10], Batch [250/391], Loss: 4.2294, LR: 0.001000
Epoch [10], Batch [300/391], Loss: 3.2238, LR: 0.001000
Epoch [10], Batch [350/391], Loss: 4.0006, LR: 0.001000
Train set: Epoch: 10, Average loss:3.9082, LR: 0.001000 Top-1 Accuracy: 13.1760%, Top-5 Accuracy: 35.0780%, Time consumed:52.39s


 10%|█████████▏                                                                                  | 10/100 [10:06<1:31:14, 60.83s/it]

Test set: Epoch: 10, Average loss:3.6978, Top-1 Accuracy: 14.3300%, Top-5 Accuracy: 36.7400%, Time consumed:8.51s

EarlyStopping 카운터: 3 / 10
Epoch [11], Batch [50/391], Loss: 3.9109, LR: 0.001000
Epoch [11], Batch [100/391], Loss: 4.1782, LR: 0.001000
Epoch [11], Batch [150/391], Loss: 3.2167, LR: 0.001000
Epoch [11], Batch [200/391], Loss: 3.3141, LR: 0.001000
Epoch [11], Batch [250/391], Loss: 4.1397, LR: 0.001000
Epoch [11], Batch [300/391], Loss: 4.2575, LR: 0.001000
Epoch [11], Batch [350/391], Loss: 3.3731, LR: 0.001000
Train set: Epoch: 11, Average loss:3.9065, LR: 0.001000 Top-1 Accuracy: 12.4000%, Top-5 Accuracy: 34.4160%, Time consumed:51.02s


 11%|██████████                                                                                  | 11/100 [11:06<1:29:37, 60.42s/it]

Test set: Epoch: 11, Average loss:3.6587, Top-1 Accuracy: 15.4700%, Top-5 Accuracy: 40.3000%, Time consumed:8.46s

EarlyStopping 카운터: 4 / 10
Epoch [12], Batch [50/391], Loss: 3.0928, LR: 0.000100
Epoch [12], Batch [100/391], Loss: 4.0565, LR: 0.000100
Epoch [12], Batch [150/391], Loss: 4.0585, LR: 0.000100
Epoch [12], Batch [200/391], Loss: 4.3236, LR: 0.000100
Epoch [12], Batch [250/391], Loss: 3.3742, LR: 0.000100
Epoch [12], Batch [300/391], Loss: 3.8845, LR: 0.000100
Epoch [12], Batch [350/391], Loss: 3.2511, LR: 0.000100
Train set: Epoch: 12, Average loss:3.7911, LR: 0.000100 Top-1 Accuracy: 14.9160%, Top-5 Accuracy: 38.0760%, Time consumed:51.49s


 12%|███████████                                                                                 | 12/100 [12:06<1:28:18, 60.22s/it]

Test set: Epoch: 12, Average loss:3.3227, Top-1 Accuracy: 20.5800%, Top-5 Accuracy: 48.4300%, Time consumed:8.26s

새로운 최고 top-5 정확도: 48.43%
EarlyStopping 카운터: 5 / 10
Epoch [13], Batch [50/391], Loss: 4.2935, LR: 0.000100
Epoch [13], Batch [100/391], Loss: 3.8307, LR: 0.000100
Epoch [13], Batch [150/391], Loss: 4.2085, LR: 0.000100
Epoch [13], Batch [200/391], Loss: 3.4971, LR: 0.000100
Epoch [13], Batch [250/391], Loss: 3.7767, LR: 0.000100
Epoch [13], Batch [300/391], Loss: 4.4504, LR: 0.000100
Epoch [13], Batch [350/391], Loss: 3.0681, LR: 0.000100
Train set: Epoch: 13, Average loss:3.6729, LR: 0.000100 Top-1 Accuracy: 16.9060%, Top-5 Accuracy: 41.5500%, Time consumed:50.93s
Test set: Epoch: 13, Average loss:3.1790, Top-1 Accuracy: 23.5700%, Top-5 Accuracy: 52.2100%, Time consumed:8.67s

새로운 최고 top-1 정확도: 23.57%, top-5 정확도: 52.21%
새로운 최고 top-5 정확도: 52.21%
Accuracy improved (20.65% --> 23.57%). Saving model ...


 13%|███████████▉                                                                                | 13/100 [13:05<1:27:09, 60.11s/it]

Epoch [14], Batch [50/391], Loss: 3.8744, LR: 0.000100
Epoch [14], Batch [100/391], Loss: 4.0096, LR: 0.000100
Epoch [14], Batch [150/391], Loss: 4.2390, LR: 0.000100
Epoch [14], Batch [200/391], Loss: 4.1153, LR: 0.000100
Epoch [14], Batch [250/391], Loss: 3.1430, LR: 0.000100
Epoch [14], Batch [300/391], Loss: 3.0462, LR: 0.000100
Epoch [14], Batch [350/391], Loss: 3.0680, LR: 0.000100
Train set: Epoch: 14, Average loss:3.6877, LR: 0.000100 Top-1 Accuracy: 16.8260%, Top-5 Accuracy: 41.2060%, Time consumed:50.86s


 14%|████████████▉                                                                               | 14/100 [14:05<1:25:46, 59.84s/it]

Test set: Epoch: 14, Average loss:3.1558, Top-1 Accuracy: 22.9700%, Top-5 Accuracy: 51.6400%, Time consumed:8.35s

EarlyStopping 카운터: 1 / 10
Epoch [15], Batch [50/391], Loss: 3.5931, LR: 0.000100
Epoch [15], Batch [100/391], Loss: 3.9532, LR: 0.000100
Epoch [15], Batch [150/391], Loss: 4.0522, LR: 0.000100
Epoch [15], Batch [200/391], Loss: 3.4146, LR: 0.000100
Epoch [15], Batch [250/391], Loss: 3.5329, LR: 0.000100
Epoch [15], Batch [300/391], Loss: 3.0561, LR: 0.000100
Epoch [15], Batch [350/391], Loss: 3.4370, LR: 0.000100
Train set: Epoch: 15, Average loss:3.6840, LR: 0.000100 Top-1 Accuracy: 17.2680%, Top-5 Accuracy: 41.8640%, Time consumed:52.04s


 15%|█████████████▊                                                                              | 15/100 [15:05<1:25:12, 60.15s/it]

Test set: Epoch: 15, Average loss:3.5290, Top-1 Accuracy: 20.2900%, Top-5 Accuracy: 48.1300%, Time consumed:8.81s

EarlyStopping 카운터: 2 / 10
Epoch [16], Batch [50/391], Loss: 4.1062, LR: 0.000100
Epoch [16], Batch [100/391], Loss: 3.7666, LR: 0.000100
Epoch [16], Batch [150/391], Loss: 3.0958, LR: 0.000100
Epoch [16], Batch [200/391], Loss: 3.8594, LR: 0.000100
Epoch [16], Batch [250/391], Loss: 3.9595, LR: 0.000100
Epoch [16], Batch [300/391], Loss: 3.2769, LR: 0.000100
Epoch [16], Batch [350/391], Loss: 4.3092, LR: 0.000100
Train set: Epoch: 16, Average loss:3.6972, LR: 0.000100 Top-1 Accuracy: 16.8940%, Top-5 Accuracy: 41.4720%, Time consumed:50.63s


 16%|██████████████▋                                                                             | 16/100 [16:05<1:23:45, 59.82s/it]

Test set: Epoch: 16, Average loss:3.7813, Top-1 Accuracy: 20.4500%, Top-5 Accuracy: 46.2700%, Time consumed:8.45s

EarlyStopping 카운터: 3 / 10
Epoch [17], Batch [50/391], Loss: 4.0306, LR: 0.000100
Epoch [17], Batch [100/391], Loss: 3.5458, LR: 0.000100
Epoch [17], Batch [150/391], Loss: 3.0810, LR: 0.000100
Epoch [17], Batch [200/391], Loss: 3.3442, LR: 0.000100
Epoch [17], Batch [250/391], Loss: 4.1726, LR: 0.000100
Epoch [17], Batch [300/391], Loss: 3.4311, LR: 0.000100
Epoch [17], Batch [350/391], Loss: 3.4073, LR: 0.000100
Train set: Epoch: 17, Average loss:3.6632, LR: 0.000100 Top-1 Accuracy: 17.6960%, Top-5 Accuracy: 42.2740%, Time consumed:50.95s


 17%|███████████████▋                                                                            | 17/100 [17:04<1:22:37, 59.73s/it]

Test set: Epoch: 17, Average loss:3.3551, Top-1 Accuracy: 19.5400%, Top-5 Accuracy: 46.5700%, Time consumed:8.56s

EarlyStopping 카운터: 4 / 10
Epoch [18], Batch [50/391], Loss: 2.9802, LR: 0.000010
Epoch [18], Batch [100/391], Loss: 4.1888, LR: 0.000010
Epoch [18], Batch [150/391], Loss: 3.4759, LR: 0.000010
Epoch [18], Batch [200/391], Loss: 3.3561, LR: 0.000010
Epoch [18], Batch [250/391], Loss: 3.5937, LR: 0.000010
Epoch [18], Batch [300/391], Loss: 3.1988, LR: 0.000010
Epoch [18], Batch [350/391], Loss: 4.0977, LR: 0.000010
Train set: Epoch: 18, Average loss:3.6512, LR: 0.000010 Top-1 Accuracy: 17.8820%, Top-5 Accuracy: 42.9160%, Time consumed:50.93s


 18%|████████████████▌                                                                           | 18/100 [18:04<1:21:38, 59.74s/it]

Test set: Epoch: 18, Average loss:3.2897, Top-1 Accuracy: 21.7400%, Top-5 Accuracy: 48.8000%, Time consumed:8.82s

EarlyStopping 카운터: 5 / 10
Epoch [19], Batch [50/391], Loss: 3.5942, LR: 0.000010
Epoch [19], Batch [100/391], Loss: 3.8435, LR: 0.000010
Epoch [19], Batch [150/391], Loss: 3.8024, LR: 0.000010
Epoch [19], Batch [200/391], Loss: 4.1556, LR: 0.000010
Epoch [19], Batch [250/391], Loss: 2.8763, LR: 0.000010
Epoch [19], Batch [300/391], Loss: 4.1120, LR: 0.000010
Epoch [19], Batch [350/391], Loss: 3.0302, LR: 0.000010
Train set: Epoch: 19, Average loss:3.6559, LR: 0.000010 Top-1 Accuracy: 17.8580%, Top-5 Accuracy: 42.8060%, Time consumed:51.67s


 19%|█████████████████▍                                                                          | 19/100 [19:04<1:20:54, 59.93s/it]

Test set: Epoch: 19, Average loss:4.0634, Top-1 Accuracy: 18.8900%, Top-5 Accuracy: 45.1800%, Time consumed:8.70s

EarlyStopping 카운터: 6 / 10
Epoch [20], Batch [50/391], Loss: 4.1833, LR: 0.000010
Epoch [20], Batch [100/391], Loss: 3.5897, LR: 0.000010
Epoch [20], Batch [150/391], Loss: 4.1394, LR: 0.000010
Epoch [20], Batch [200/391], Loss: 4.1080, LR: 0.000010
Epoch [20], Batch [250/391], Loss: 4.1617, LR: 0.000010
Epoch [20], Batch [300/391], Loss: 3.3723, LR: 0.000010
Epoch [20], Batch [350/391], Loss: 4.1026, LR: 0.000010
Train set: Epoch: 20, Average loss:3.6272, LR: 0.000010 Top-1 Accuracy: 18.2780%, Top-5 Accuracy: 43.7760%, Time consumed:50.37s


 20%|██████████████████▍                                                                         | 20/100 [20:03<1:19:25, 59.57s/it]

Test set: Epoch: 20, Average loss:3.2730, Top-1 Accuracy: 21.6000%, Top-5 Accuracy: 48.9100%, Time consumed:8.37s

EarlyStopping 카운터: 7 / 10
Epoch [21], Batch [50/391], Loss: 2.8493, LR: 0.000010
Epoch [21], Batch [100/391], Loss: 3.1595, LR: 0.000010
Epoch [21], Batch [150/391], Loss: 4.2727, LR: 0.000010
Epoch [21], Batch [200/391], Loss: 3.5153, LR: 0.000010
Epoch [21], Batch [250/391], Loss: 4.3419, LR: 0.000010
Epoch [21], Batch [300/391], Loss: 4.2252, LR: 0.000010
Epoch [21], Batch [350/391], Loss: 4.1676, LR: 0.000010
Train set: Epoch: 21, Average loss:3.6646, LR: 0.000010 Top-1 Accuracy: 18.0740%, Top-5 Accuracy: 42.9560%, Time consumed:51.30s


 21%|███████████████████▎                                                                        | 21/100 [21:03<1:18:28, 59.60s/it]

Test set: Epoch: 21, Average loss:4.1292, Top-1 Accuracy: 19.6600%, Top-5 Accuracy: 46.1600%, Time consumed:8.36s

EarlyStopping 카운터: 8 / 10
Epoch [22], Batch [50/391], Loss: 3.0645, LR: 0.000001
Epoch [22], Batch [100/391], Loss: 3.6660, LR: 0.000001
Epoch [22], Batch [150/391], Loss: 3.5153, LR: 0.000001
Epoch [22], Batch [200/391], Loss: 3.8972, LR: 0.000001
Epoch [22], Batch [250/391], Loss: 4.0173, LR: 0.000001
Epoch [22], Batch [300/391], Loss: 4.1590, LR: 0.000001
Epoch [22], Batch [350/391], Loss: 4.3197, LR: 0.000001
Train set: Epoch: 22, Average loss:3.6661, LR: 0.000001 Top-1 Accuracy: 17.7880%, Top-5 Accuracy: 42.7220%, Time consumed:50.48s


 22%|████████████████████▏                                                                       | 22/100 [22:02<1:17:13, 59.40s/it]

Test set: Epoch: 22, Average loss:3.3325, Top-1 Accuracy: 20.9400%, Top-5 Accuracy: 48.4300%, Time consumed:8.46s

EarlyStopping 카운터: 9 / 10
Epoch [23], Batch [50/391], Loss: 3.1043, LR: 0.000001
Epoch [23], Batch [100/391], Loss: 4.3722, LR: 0.000001
Epoch [23], Batch [150/391], Loss: 3.4011, LR: 0.000001
Epoch [23], Batch [200/391], Loss: 3.3554, LR: 0.000001
Epoch [23], Batch [250/391], Loss: 3.4909, LR: 0.000001
Epoch [23], Batch [300/391], Loss: 2.7837, LR: 0.000001
Epoch [23], Batch [350/391], Loss: 3.3395, LR: 0.000001
Train set: Epoch: 23, Average loss:3.6635, LR: 0.000001 Top-1 Accuracy: 17.6980%, Top-5 Accuracy: 42.4540%, Time consumed:51.12s


 22%|████████████████████▏                                                                       | 22/100 [23:01<1:21:38, 62.80s/it]

Test set: Epoch: 23, Average loss:3.4015, Top-1 Accuracy: 19.6100%, Top-5 Accuracy: 45.4400%, Time consumed:8.39s

EarlyStopping 카운터: 10 / 10
에폭 23에서 학습 조기 종료. 최고 성능 에폭: 13
테스트 정확도 기준 최고 모델 로드 중...





Test set: Epoch: 100, Average loss:3.1790, Top-1 Accuracy: 23.5700%, Top-5 Accuracy: 52.2100%, Time consumed:8.46s

완료! 최고 테스트 top-1 정확도: 23.57%, 최고 테스트 top-5 정확도: 52.21%
최종 테스트 top-1 정확도: 23.57%, 최종 테스트 top-5 정확도: 52.21%
전체 학습 시간: 1390.12 초


0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇██
learning_rate,▂▄▅▇██████▂▂▂▂▂▂▁▁▁▁▁▁▁
test_accuracy_top1,▁▂▃▄▃▆▇▆▆▃▄▆██▆▆▆▇▆▇▆▇▆
test_accuracy_top5,▁▂▃▅▃▆▇▇▇▃▄▇██▇▆▆▇▆▇▆▇▆
test_loss,▆▆▅▄▅▃▂▂▂▅▅▂▁▁▄▅▂▂█▂█▂▃
total_training_time,▁
train_accuracy_top1,▁▃▄▄▅▅▅▅▅▅▅▆▇▇▇▇███████
train_accuracy_top5,▁▃▄▅▅▅▅▅▅▆▅▆▇▇▇▇███████
train_loss,█▆▅▄▄▄▄▄▄▄▄▃▁▂▂▂▁▁▁▁▁▁▁

0,1
best_epoch,13
best_test_accuracy_top1,23.57
best_test_accuracy_top5,52.21
early_stopped,True
early_stopped_epoch,23
epoch,23
final_test_accuracy_top1,23.57
final_test_accuracy_top5,52.21
learning_rate,0.0
test_accuracy_top1,19.61
