In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transforms_v2  # CutMix를 위한 v2 transforms 추가
import sys
import os
import time
import random
import numpy as np
import wandb
from tqdm import tqdm
from tools.tool import AccuracyEarlyStopping  # 수정된 AccuracyEarlyStopping 클래스 임포트
from models.shake_resnet import shake_resnet18

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="shake_resnet18_cutmix,flip,crop_standard")  

# WandB 설정
config = {
    "model": "shake_resnet18",
    "batch_size": 128,
    "num_epochs": 100,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
    "patience": 10,  # early stopping patience
    "max_epochs_wait": 30,  # 최대 30 에폭까지만 기다림
    "cutmix_alpha": 1.0,  # CutMix 알파 파라미터 추가
    "cutmix_prob": 0.5,   # CutMix 적용 확률 추가
    "crop_padding": 4,    # RandomCrop 패딩 크기
    "crop_size": 32,      # RandomCrop 크기 (CIFAR-100 이미지 크기는 32x32)
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드 - 기본 train/test 분할 사용
transform_train = transforms.Compose([
    transforms.RandomCrop(config["crop_size"], padding=config["crop_padding"]),  # 패딩 후 랜덤 크롭
    transforms.RandomHorizontalFlip(),  # 수평 뒤집기
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

# DataLoader 생성
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)
testloader = DataLoader(testset, batch_size=config["batch_size"], shuffle=False, num_workers=16)

print(f"Train set size: {len(trainset)}")
print(f"Test set size: {len(testset)}")

# CutMix 변환 정의
cutmix = transforms_v2.CutMix(alpha=config["cutmix_alpha"], num_classes=100)  # CIFAR-100은 100개 클래스

def train(model, trainloader, criterion, optimizer, device, epoch):
    """
    학습 함수 (CutMix 적용)
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # CutMix 확률적 적용
        if random.random() < config["cutmix_prob"]:
            inputs, labels = cutmix(inputs, labels)
            # 이 경우 labels은 원-핫 인코딩 형태로 변환됨
            use_cutmix = True
        else:
            use_cutmix = False
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        # CutMix 적용 여부에 따라 손실 함수 선택
        if use_cutmix:
            # CutMix가 적용된 경우 (원-핫 인코딩된 레이블)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
        else:
            # 일반적인 경우 (정수 인덱스 레이블)
            loss = criterion(outputs, labels)
            
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # 정확도 계산 - CutMix 적용 여부에 따라 다르게 처리
        if use_cutmix:
            # 원-핫 인코딩된 레이블에서 argmax를 사용해 가장 큰 값의 인덱스 추출
            _, label_idx = labels.max(1)
        else:
            # 정수 인덱스 레이블 그대로 사용
            label_idx = labels
            
        # top-1 정확도 계산
        _, predicted = outputs.max(1)
        total += inputs.size(0)
        correct_top1 += predicted.eq(label_idx).sum().item()
        
        # top-5 정확도 계산
        _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
        correct_top5 += sum([1 for i in range(len(label_idx)) if label_idx[i] in top5_idx[i]])
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy_top1, accuracy_top5

def evaluate(model, dataloader, criterion, device, epoch, phase="test"):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    eval_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            
            # top-1 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct_top1 += (predicted == labels).sum().item()
            
            # top-5 정확도 계산
            _, top5_idx = outputs.topk(5, 1, largest=True, sorted=True)
            correct_top5 += top5_idx.eq(labels.view(-1, 1).expand_as(top5_idx)).sum().item()
    
    # 평균 손실 및 정확도 계산
    eval_loss = eval_loss / len(dataloader)
    accuracy_top1 = 100.0 * correct_top1 / total
    accuracy_top5 = 100.0 * correct_top5 / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력
    print(f'{phase.capitalize()} set: Epoch: {epoch+1}, Average loss:{eval_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy_top1:.4f}%, Top-5 Accuracy: {accuracy_top5:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return eval_loss, accuracy_top1, accuracy_top5

# 메인 학습 루프
def main_training_loop(model, trainloader, testloader, criterion, optimizer, device, num_epochs, patience, max_epochs_wait):
    """
    메인 학습 루프 (accuracy 기준 early stopping)
    """
    # 정확도 기반 얼리 스토핑 사용
    early_stopping = AccuracyEarlyStopping(patience=patience, verbose=True, path='checkpoint.pt', max_epochs=max_epochs_wait)
    
    best_test_acc_top1 = 0.0
    best_test_acc_top5 = 0.0
    
    # 테스트 정확도 기록을 위한 리스트
    test_acc_top1_history = []
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc_top1, train_acc_top5 = train(model, trainloader, criterion, optimizer, device, epoch)
        
        # 테스트 데이터로 평가
        test_loss, test_acc_top1, test_acc_top5 = evaluate(model, testloader, criterion, device, epoch, phase="test")
        
        # 테스트 정확도 기록
        test_acc_top1_history.append(test_acc_top1)
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy_top1": train_acc_top1,
            "train_accuracy_top5": train_acc_top5,
            "test_loss": test_loss,
            "test_accuracy_top1": test_acc_top1,
            "test_accuracy_top5": test_acc_top5
        })
            
        # 최고 정확도 모델 저장 (top-1 기준)
        if test_acc_top1 > best_test_acc_top1:
            best_test_acc_top1 = test_acc_top1
            best_test_acc_top5_at_best_top1 = test_acc_top5
            print(f'새로운 최고 top-1 정확도: {best_test_acc_top1:.2f}%, top-5 정확도: {best_test_acc_top5_at_best_top1:.2f}%')
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
        
        # top-5 accuracy 기록 업데이트
        if test_acc_top5 > best_test_acc_top5:
            best_test_acc_top5 = test_acc_top5
            print(f'새로운 최고 top-5 정확도: {best_test_acc_top5:.2f}%')

        # Early stopping 체크 (test_acc_top1 기준)
        early_stopping(test_acc_top1, model, epoch)
        if early_stopping.early_stop:
            print(f"에폭 {epoch+1}에서 학습 조기 종료. 최고 성능 에폭: {early_stopping.best_epoch+1}")
            break
    
    # 훈련 완료 후 최고 모델 로드
    print("테스트 정확도 기준 최고 모델 로드 중...")
    model_path = f'best_model_{wandb.run.name}.pth'
    model.load_state_dict(torch.load(model_path))

    # 최종 테스트 세트 평가
    final_test_loss, final_test_acc_top1, final_test_acc_top5 = evaluate(model, testloader, criterion, device, num_epochs-1, phase="test")
    
    print(f'완료! 최고 테스트 top-1 정확도: {best_test_acc_top1:.2f}%, 최고 테스트 top-5 정확도: {best_test_acc_top5:.2f}%')
    print(f'최종 테스트 top-1 정확도: {final_test_acc_top1:.2f}%, 최종 테스트 top-5 정확도: {final_test_acc_top5:.2f}%')
    
    # WandB에 최종 결과 기록
    wandb.run.summary["best_test_accuracy_top1"] = best_test_acc_top1
    wandb.run.summary["best_test_accuracy_top5"] = best_test_acc_top5
    wandb.run.summary["final_test_accuracy_top1"] = final_test_acc_top1
    wandb.run.summary["final_test_accuracy_top5"] = final_test_acc_top5

    # Early stopping 정보 저장
    if early_stopping.early_stop:
        wandb.run.summary["early_stopped"] = True
        wandb.run.summary["early_stopped_epoch"] = epoch+1
        wandb.run.summary["best_epoch"] = early_stopping.best_epoch+1
    else:
        wandb.run.summary["early_stopped"] = False


# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
# 또는 매개변수 커스터마이징
model = shake_resnet18(num_classes=100, p_drop=0.5, alpha_range=[0, 0]).to(device)
criterion = nn.CrossEntropyLoss()  # 기본 CrossEntropyLoss 사용 (라벨 스무딩 없음)
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])  # 옵티마이저 정의

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"],
    patience=config["patience"],
    max_epochs_wait=config["max_epochs_wait"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"전체 학습 시간: {total_time:.2f} 초")

# WandB 실행 종료
wandb.finish()



Files already downloaded and verified
Files already downloaded and verified
Train set size: 50000
Test set size: 10000
Using device: cuda
2개의 GPU를 사용합니다.


  0%|                                                                                                       | 0/100 [00:00<?, ?it/s]

Epoch [1], Batch [50/391], Loss: 4.4131
Epoch [1], Batch [100/391], Loss: 4.5366
Epoch [1], Batch [150/391], Loss: 4.5755
Epoch [1], Batch [200/391], Loss: 3.8333
Epoch [1], Batch [250/391], Loss: 4.3270
Epoch [1], Batch [300/391], Loss: 3.9326
Epoch [1], Batch [350/391], Loss: 4.1994
Train set: Epoch: 1, Average loss:4.2195, LR: 0.001000 Top-1 Accuracy: 6.8840%, Top-5 Accuracy: 23.1880%, Time consumed:52.02s
Test set: Epoch: 1, Average loss:3.7470, Top-1 Accuracy: 11.1100%, Top-5 Accuracy: 34.5800%, Time consumed:8.36s

새로운 최고 top-1 정확도: 11.11%, top-5 정확도: 34.58%
새로운 최고 top-5 정확도: 34.58%
Accuracy improved (-inf% --> 11.11%). Saving model ...


  1%|▉                                                                                            | 1/100 [01:00<1:40:03, 60.64s/it]

Epoch [2], Batch [50/391], Loss: 4.4572
Epoch [2], Batch [100/391], Loss: 4.3913
Epoch [2], Batch [150/391], Loss: 3.8893
Epoch [2], Batch [200/391], Loss: 3.6757
Epoch [2], Batch [250/391], Loss: 4.4478
Epoch [2], Batch [300/391], Loss: 3.7413
Epoch [2], Batch [350/391], Loss: 3.3925
Train set: Epoch: 2, Average loss:4.0110, LR: 0.001000 Top-1 Accuracy: 9.9680%, Top-5 Accuracy: 30.1520%, Time consumed:54.14s
Test set: Epoch: 2, Average loss:3.5607, Top-1 Accuracy: 15.5200%, Top-5 Accuracy: 41.0700%, Time consumed:8.84s

새로운 최고 top-1 정확도: 15.52%, top-5 정확도: 41.07%
새로운 최고 top-5 정확도: 41.07%
Accuracy improved (11.11% --> 15.52%). Saving model ...


  2%|█▊                                                                                           | 2/100 [02:03<1:41:31, 62.16s/it]

Epoch [3], Batch [50/391], Loss: 4.1063
Epoch [3], Batch [100/391], Loss: 3.8103
Epoch [3], Batch [150/391], Loss: 3.4233
Epoch [3], Batch [200/391], Loss: 4.2381
Epoch [3], Batch [250/391], Loss: 3.5546
Epoch [3], Batch [300/391], Loss: 3.5591
Epoch [3], Batch [350/391], Loss: 3.7933
Train set: Epoch: 3, Average loss:3.9209, LR: 0.001000 Top-1 Accuracy: 11.9220%, Top-5 Accuracy: 33.5760%, Time consumed:55.14s
Test set: Epoch: 3, Average loss:3.3134, Top-1 Accuracy: 20.0000%, Top-5 Accuracy: 47.5200%, Time consumed:8.46s

새로운 최고 top-1 정확도: 20.00%, top-5 정확도: 47.52%
새로운 최고 top-5 정확도: 47.52%
Accuracy improved (15.52% --> 20.00%). Saving model ...


  3%|██▊                                                                                          | 3/100 [03:07<1:41:45, 62.94s/it]

Epoch [4], Batch [50/391], Loss: 4.2191
Epoch [4], Batch [100/391], Loss: 3.4495
Epoch [4], Batch [150/391], Loss: 4.1670
Epoch [4], Batch [200/391], Loss: 4.3852
Epoch [4], Batch [250/391], Loss: 3.5038
Epoch [4], Batch [300/391], Loss: 3.5400
Epoch [4], Batch [350/391], Loss: 3.8865
Train set: Epoch: 4, Average loss:3.8661, LR: 0.001000 Top-1 Accuracy: 12.7320%, Top-5 Accuracy: 35.1920%, Time consumed:51.96s


  4%|███▋                                                                                         | 4/100 [04:08<1:39:06, 61.94s/it]

Test set: Epoch: 4, Average loss:3.4536, Top-1 Accuracy: 16.9900%, Top-5 Accuracy: 43.7600%, Time consumed:8.44s

EarlyStopping 카운터: 1 / 10
Epoch [5], Batch [50/391], Loss: 3.5085
Epoch [5], Batch [100/391], Loss: 3.6466
Epoch [5], Batch [150/391], Loss: 3.6194
Epoch [5], Batch [200/391], Loss: 3.5197
Epoch [5], Batch [250/391], Loss: 3.9333
Epoch [5], Batch [300/391], Loss: 3.7695
Epoch [5], Batch [350/391], Loss: 3.8150
Train set: Epoch: 5, Average loss:3.8874, LR: 0.001000 Top-1 Accuracy: 12.6920%, Top-5 Accuracy: 34.6820%, Time consumed:52.96s


  5%|████▋                                                                                        | 5/100 [05:09<1:37:57, 61.87s/it]

Test set: Epoch: 5, Average loss:3.4885, Top-1 Accuracy: 17.2900%, Top-5 Accuracy: 42.1200%, Time consumed:8.76s

EarlyStopping 카운터: 2 / 10
Epoch [6], Batch [50/391], Loss: 3.5151
Epoch [6], Batch [100/391], Loss: 4.2297
Epoch [6], Batch [150/391], Loss: 3.4122
Epoch [6], Batch [200/391], Loss: 4.1976
Epoch [6], Batch [250/391], Loss: 4.3244
Epoch [6], Batch [300/391], Loss: 3.6104
Epoch [6], Batch [350/391], Loss: 3.8544
Train set: Epoch: 6, Average loss:3.8685, LR: 0.001000 Top-1 Accuracy: 12.9940%, Top-5 Accuracy: 35.6140%, Time consumed:51.55s


  6%|█████▌                                                                                       | 6/100 [06:09<1:35:55, 61.23s/it]

Test set: Epoch: 6, Average loss:3.5067, Top-1 Accuracy: 17.6000%, Top-5 Accuracy: 43.4500%, Time consumed:8.44s

EarlyStopping 카운터: 3 / 10
Epoch [7], Batch [50/391], Loss: 4.9350
Epoch [7], Batch [100/391], Loss: 4.4900
Epoch [7], Batch [150/391], Loss: 3.4795
Epoch [7], Batch [200/391], Loss: 4.0150
Epoch [7], Batch [250/391], Loss: 3.6783
Epoch [7], Batch [300/391], Loss: 3.4016
Epoch [7], Batch [350/391], Loss: 4.0405
Train set: Epoch: 7, Average loss:3.8559, LR: 0.001000 Top-1 Accuracy: 13.3380%, Top-5 Accuracy: 35.8160%, Time consumed:50.88s


  7%|██████▌                                                                                      | 7/100 [07:09<1:33:54, 60.58s/it]

Test set: Epoch: 7, Average loss:4.6872, Top-1 Accuracy: 14.2900%, Top-5 Accuracy: 36.9500%, Time consumed:8.37s

EarlyStopping 카운터: 4 / 10
Epoch [8], Batch [50/391], Loss: 3.9439
Epoch [8], Batch [100/391], Loss: 4.1097
Epoch [8], Batch [150/391], Loss: 3.8097
Epoch [8], Batch [200/391], Loss: 3.5675
Epoch [8], Batch [250/391], Loss: 3.3096
Epoch [8], Batch [300/391], Loss: 3.3319
Epoch [8], Batch [350/391], Loss: 3.2245
Train set: Epoch: 8, Average loss:3.8490, LR: 0.001000 Top-1 Accuracy: 13.8180%, Top-5 Accuracy: 36.2320%, Time consumed:51.36s


  8%|███████▍                                                                                     | 8/100 [08:09<1:32:34, 60.38s/it]

Test set: Epoch: 8, Average loss:3.5070, Top-1 Accuracy: 18.4400%, Top-5 Accuracy: 43.8600%, Time consumed:8.58s

EarlyStopping 카운터: 5 / 10
Epoch [9], Batch [50/391], Loss: 3.9111
Epoch [9], Batch [100/391], Loss: 3.9518
Epoch [9], Batch [150/391], Loss: 4.2723
Epoch [9], Batch [200/391], Loss: 3.7393
Epoch [9], Batch [250/391], Loss: 4.5621
Epoch [9], Batch [300/391], Loss: 3.4549
Epoch [9], Batch [350/391], Loss: 3.8943
Train set: Epoch: 9, Average loss:3.8543, LR: 0.001000 Top-1 Accuracy: 13.9160%, Top-5 Accuracy: 36.5020%, Time consumed:53.09s


  9%|████████▎                                                                                    | 9/100 [09:11<1:32:26, 60.95s/it]

Test set: Epoch: 9, Average loss:5.3648, Top-1 Accuracy: 11.5200%, Top-5 Accuracy: 29.3000%, Time consumed:9.09s

EarlyStopping 카운터: 6 / 10
Epoch [10], Batch [50/391], Loss: 3.5169
Epoch [10], Batch [100/391], Loss: 3.3720
Epoch [10], Batch [150/391], Loss: 3.7825
Epoch [10], Batch [200/391], Loss: 3.7036
Epoch [10], Batch [250/391], Loss: 3.5044
Epoch [10], Batch [300/391], Loss: 3.7416
Epoch [10], Batch [350/391], Loss: 4.2313
Train set: Epoch: 10, Average loss:3.8603, LR: 0.001000 Top-1 Accuracy: 13.4980%, Top-5 Accuracy: 36.3980%, Time consumed:54.87s
Test set: Epoch: 10, Average loss:3.2438, Top-1 Accuracy: 21.4000%, Top-5 Accuracy: 49.8700%, Time consumed:8.37s

새로운 최고 top-1 정확도: 21.40%, top-5 정확도: 49.87%
새로운 최고 top-5 정확도: 49.87%
Accuracy improved (20.00% --> 21.40%). Saving model ...


 10%|█████████▏                                                                                  | 10/100 [10:14<1:32:36, 61.74s/it]

Epoch [11], Batch [50/391], Loss: 3.9301
Epoch [11], Batch [100/391], Loss: 3.2516
Epoch [11], Batch [150/391], Loss: 3.0322
Epoch [11], Batch [200/391], Loss: 3.2619
Epoch [11], Batch [250/391], Loss: 4.1737
Epoch [11], Batch [300/391], Loss: 4.1872
Epoch [11], Batch [350/391], Loss: 3.4522
Train set: Epoch: 11, Average loss:3.8435, LR: 0.001000 Top-1 Accuracy: 14.1260%, Top-5 Accuracy: 36.7840%, Time consumed:51.15s


 11%|██████████                                                                                  | 11/100 [11:14<1:30:38, 61.10s/it]

Test set: Epoch: 11, Average loss:3.5350, Top-1 Accuracy: 15.3500%, Top-5 Accuracy: 42.5200%, Time consumed:8.51s

EarlyStopping 카운터: 1 / 10
Epoch [12], Batch [50/391], Loss: 3.3350
Epoch [12], Batch [100/391], Loss: 4.2647
Epoch [12], Batch [150/391], Loss: 3.5254
Epoch [12], Batch [200/391], Loss: 3.4261
Epoch [12], Batch [250/391], Loss: 3.7153
Epoch [12], Batch [300/391], Loss: 4.5201
Epoch [12], Batch [350/391], Loss: 3.8699
Train set: Epoch: 12, Average loss:3.8308, LR: 0.001000 Top-1 Accuracy: 14.0860%, Top-5 Accuracy: 36.8600%, Time consumed:50.41s


 12%|███████████                                                                                 | 12/100 [12:13<1:28:34, 60.39s/it]

Test set: Epoch: 12, Average loss:3.5465, Top-1 Accuracy: 16.4200%, Top-5 Accuracy: 42.4300%, Time consumed:8.35s

EarlyStopping 카운터: 2 / 10
Epoch [13], Batch [50/391], Loss: 4.3709
Epoch [13], Batch [100/391], Loss: 3.8555
Epoch [13], Batch [150/391], Loss: 3.4770
Epoch [13], Batch [200/391], Loss: 4.0522
Epoch [13], Batch [250/391], Loss: 4.0228
Epoch [13], Batch [300/391], Loss: 3.6113
Epoch [13], Batch [350/391], Loss: 3.4090
Train set: Epoch: 13, Average loss:3.8075, LR: 0.001000 Top-1 Accuracy: 14.4780%, Top-5 Accuracy: 38.2540%, Time consumed:51.34s


 13%|███████████▉                                                                                | 13/100 [13:13<1:27:27, 60.32s/it]

Test set: Epoch: 13, Average loss:3.6782, Top-1 Accuracy: 13.0700%, Top-5 Accuracy: 36.1500%, Time consumed:8.81s

EarlyStopping 카운터: 3 / 10
Epoch [14], Batch [50/391], Loss: 3.9945
Epoch [14], Batch [100/391], Loss: 4.2817
Epoch [14], Batch [150/391], Loss: 4.2218
Epoch [14], Batch [200/391], Loss: 3.8911
Epoch [14], Batch [250/391], Loss: 2.9854
Epoch [14], Batch [300/391], Loss: 4.4269
Epoch [14], Batch [350/391], Loss: 4.2211
Train set: Epoch: 14, Average loss:3.8678, LR: 0.001000 Top-1 Accuracy: 13.9940%, Top-5 Accuracy: 36.4960%, Time consumed:51.98s


 14%|████████████▉                                                                               | 14/100 [14:13<1:26:27, 60.32s/it]

Test set: Epoch: 14, Average loss:3.5143, Top-1 Accuracy: 16.0900%, Top-5 Accuracy: 42.7000%, Time consumed:8.32s

EarlyStopping 카운터: 4 / 10
Epoch [15], Batch [50/391], Loss: 2.9616
Epoch [15], Batch [100/391], Loss: 3.2401
Epoch [15], Batch [150/391], Loss: 3.3114
Epoch [15], Batch [200/391], Loss: 3.2607
Epoch [15], Batch [250/391], Loss: 4.2906
Epoch [15], Batch [300/391], Loss: 3.9390
Epoch [15], Batch [350/391], Loss: 4.2664
Train set: Epoch: 15, Average loss:3.8537, LR: 0.001000 Top-1 Accuracy: 13.9340%, Top-5 Accuracy: 36.7580%, Time consumed:52.13s


 15%|█████████████▊                                                                              | 15/100 [15:14<1:25:30, 60.35s/it]

Test set: Epoch: 15, Average loss:3.5952, Top-1 Accuracy: 14.2800%, Top-5 Accuracy: 39.2900%, Time consumed:8.29s

EarlyStopping 카운터: 5 / 10
Epoch [16], Batch [50/391], Loss: 4.1775
Epoch [16], Batch [100/391], Loss: 3.5318
Epoch [16], Batch [150/391], Loss: 4.2389
Epoch [16], Batch [200/391], Loss: 3.7378
Epoch [16], Batch [250/391], Loss: 4.5162
Epoch [16], Batch [300/391], Loss: 4.2633
Epoch [16], Batch [350/391], Loss: 3.5979
Train set: Epoch: 16, Average loss:3.9185, LR: 0.001000 Top-1 Accuracy: 13.2320%, Top-5 Accuracy: 35.2680%, Time consumed:50.45s


 16%|██████████████▋                                                                             | 16/100 [16:13<1:23:59, 60.00s/it]

Test set: Epoch: 16, Average loss:3.4795, Top-1 Accuracy: 16.8900%, Top-5 Accuracy: 42.9300%, Time consumed:8.71s

EarlyStopping 카운터: 6 / 10
Epoch [17], Batch [50/391], Loss: 3.5065
Epoch [17], Batch [100/391], Loss: 4.3721
Epoch [17], Batch [150/391], Loss: 3.8895
Epoch [17], Batch [200/391], Loss: 3.4335
Epoch [17], Batch [250/391], Loss: 3.5134
Epoch [17], Batch [300/391], Loss: 4.4761
Epoch [17], Batch [350/391], Loss: 4.1057
Train set: Epoch: 17, Average loss:3.8944, LR: 0.001000 Top-1 Accuracy: 13.6180%, Top-5 Accuracy: 35.5880%, Time consumed:50.51s


 17%|███████████████▋                                                                            | 17/100 [17:12<1:22:32, 59.66s/it]

Test set: Epoch: 17, Average loss:3.8188, Top-1 Accuracy: 12.8100%, Top-5 Accuracy: 34.1700%, Time consumed:8.37s

EarlyStopping 카운터: 7 / 10
Epoch [18], Batch [50/391], Loss: 3.6548
Epoch [18], Batch [100/391], Loss: 4.2710
Epoch [18], Batch [150/391], Loss: 4.0048
Epoch [18], Batch [200/391], Loss: 4.3955
Epoch [18], Batch [250/391], Loss: 3.5036
Epoch [18], Batch [300/391], Loss: 4.4595
Epoch [18], Batch [350/391], Loss: 3.5250
Train set: Epoch: 18, Average loss:3.8949, LR: 0.001000 Top-1 Accuracy: 13.4860%, Top-5 Accuracy: 35.7660%, Time consumed:50.72s


 18%|████████████████▌                                                                           | 18/100 [18:11<1:21:21, 59.53s/it]

Test set: Epoch: 18, Average loss:4.0211, Top-1 Accuracy: 8.0100%, Top-5 Accuracy: 26.2200%, Time consumed:8.50s

EarlyStopping 카운터: 8 / 10
Epoch [19], Batch [50/391], Loss: 3.4713
Epoch [19], Batch [100/391], Loss: 4.2098
Epoch [19], Batch [150/391], Loss: 3.7448
Epoch [19], Batch [200/391], Loss: 4.0576
Epoch [19], Batch [250/391], Loss: 4.2825
Epoch [19], Batch [300/391], Loss: 3.3730
Epoch [19], Batch [350/391], Loss: 4.2926
Train set: Epoch: 19, Average loss:3.8974, LR: 0.001000 Top-1 Accuracy: 13.6760%, Top-5 Accuracy: 35.8380%, Time consumed:52.61s


 19%|█████████████████▍                                                                          | 19/100 [19:13<1:21:13, 60.16s/it]

Test set: Epoch: 19, Average loss:3.9990, Top-1 Accuracy: 9.5900%, Top-5 Accuracy: 27.2800%, Time consumed:9.02s

EarlyStopping 카운터: 9 / 10
Epoch [20], Batch [50/391], Loss: 4.2931
Epoch [20], Batch [100/391], Loss: 3.7948
Epoch [20], Batch [150/391], Loss: 4.2035
Epoch [20], Batch [200/391], Loss: 4.5825
Epoch [20], Batch [250/391], Loss: 3.8772
Epoch [20], Batch [300/391], Loss: 4.1929
Epoch [20], Batch [350/391], Loss: 3.9609
