In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import sys
import os
import torch
import time
import random
import numpy as np
import wandb
from tqdm import tqdm

from models.resnet import resnet18, resnet34, resnet50

wandb.login(key="ef091b9abcea3186341ddf8995d62bde62d7469e")
wandb.init(project="PBL-2", name="resnet50-cutmix")


"""
# 학습 재현성 고정
def fix_seed(seed, deterministic=False):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
    else:
        torch.backends.cudnn.benchmark = True  # 성능 향상을 위해 True로 변경

deterministic=True와 benchmark=False는 확실히 학습 속도를 저하시킬 수 있습니다.
특히 torch.backends.cudnn.benchmark=False는 CUDA가 최적의 알고리즘을 찾기 위한 
벤치마킹을 수행하지 않게 만들어 성능이 떨어질 수 있습니다.
# 속도 우선 설정 -> 완벽한 재현성은 보장되지 않음 

fix_seed(2025, deterministic=False)
"""

# WandB 설정
config = {
    "model": "resnet50",
    "batch_size": 128,
    "num_epochs": 100,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "seed": 2025,
    "deterministic": False,
}
wandb.config.update(config)

# CIFAR-100 데이터셋 로드
transform_train = transforms.Compose([
    transforms.RandomHorizonalFlip()
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

trainset = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=config["batch_size"], shuffle=True, num_workers=16)

testset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=config["batch_size"], shuffle=False, num_workers=16)


def cutmix(batch, alpha):
    data, targets = batch

    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)

    image_h, image_w = data.shape[2:]
    cx = np.random.uniform(0, image_w)
    cy = np.random.uniform(0, image_h)
    w = image_w * np.sqrt(1 - lam)
    h = image_h * np.sqrt(1 - lam)
    x0 = int(np.round(max(cx - w / 2, 0)))
    x1 = int(np.round(min(cx + w / 2, image_w)))
    y0 = int(np.round(max(cy - h / 2, 0)))
    y1 = int(np.round(min(cy + h / 2, image_h)))

    data[:, :, y0:y1, x0:x1] = shuffled_data[:, :, y0:y1, x0:x1]
    targets = (targets, shuffled_targets, lam)

    return data, targets


class CutMixCollator:
    def __init__(self, alpha):
        self.alpha = alpha

    def __call__(self, batch):
        batch = torch.utils.data.dataloader.default_collate(batch)
        batch = cutmix(batch, self.alpha)
        return batch


class CutMixCriterion:
    def __init__(self, reduction):
        self.criterion = nn.CrossEntropyLoss(reduction=reduction)

    def __call__(self, preds, targets):
        targets1, targets2, lam = targets
        return lam * self.criterion(
            preds, targets1) + (1 - lam) * self.criterion(preds, targets2)




def train(model, trainloader, criterion, optimizer, device, epoch):
    """
    학습 함수 - 0.5 확률로 CutMix 적용
    """
    model.train()   # 모델을 학습 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    running_loss = 0.0
    correct = 0
    total = 0
    
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # 0.5 확률로 CutMix 적용
        r = np.random.rand(1)
        if r < 0.5:  # 50% 확률로 CutMix 적용
            # CutMix 적용
            indices = torch.randperm(inputs.size(0)).to(device)
            shuffled_labels = labels[indices]
            
            # 혼합 비율 결정 (Beta(1,1) = 균등분포)
            lam = np.random.beta(1.0, 1.0)
            
            # 랜덤 박스 좌표 생성
            image_h, image_w = inputs.shape[2:]
            cx = np.random.uniform(0, image_w)
            cy = np.random.uniform(0, image_h)
            w = image_w * np.sqrt(1 - lam)
            h = image_h * np.sqrt(1 - lam)
            x0 = int(np.round(max(cx - w / 2, 0)))
            x1 = int(np.round(min(cx + w / 2, image_w)))
            y0 = int(np.round(max(cy - h / 2, 0)))
            y1 = int(np.round(min(cy + h / 2, image_h)))
            
            # 이미지 섞기
            inputs[:, :, y0:y1, x0:x1] = inputs[indices, :, y0:y1, x0:x1]
            
            # 실제 혼합 비율 계산 (영역 기반)
            lam = 1 - ((y1 - y0) * (x1 - x0) / (image_h * image_w))
            
            # 모델 순전파
            outputs = model(inputs)
            
            # CutMix 손실 계산
            loss = lam * criterion(outputs, labels) + (1 - lam) * criterion(outputs, shuffled_labels)
        else:
            # 일반 순전파 및 손실 계산
            outputs = model(inputs)
            loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # 정확도 계산 (CutMix를 적용했을 때는 원래 레이블로 평가)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        if (i + 1) % 50 == 0:  # 50 배치마다 출력
            print(f'Epoch [{epoch+1}], Batch [{i+1}/{len(trainloader)}], Loss: {loss.item():.4f}')
    
    epoch_loss = running_loss / len(trainloader)
    accuracy = 100.0 * correct / total
    
    train_time = time.time() - start_time
    
    # 학습 세트에 대한 성능 출력
    print(f'Train set: Epoch: {epoch+1}, Average loss:{epoch_loss:.4f}, LR: {optimizer.param_groups[0]["lr"]:.6f} '
          f'Top-1 Accuracy: {accuracy:.4f}%, Time consumed:{train_time:.2f}s')
    
    return epoch_loss, accuracy

def evaluate(model, testloader, criterion, device, epoch):
    """
    평가 함수
    """
    model.eval()  # 모델을 평가 모드로 설정
    start_time = time.time()  # 시간 측정 시작
    
    test_loss = 0.0
    correct = 0
    total = 0
    
    # 그래디언트 계산 비활성화
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # 순전파
            outputs = model(inputs)
            
            # 손실 계산
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            
            # 정확도 계산
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    # 평균 손실 및 정확도 계산
    test_loss = test_loss / len(testloader)
    accuracy = 100.0 * correct / total
    
    # 평가 시간 계산
    eval_time = time.time() - start_time
    
    # 테스트 세트에 대한 성능 출력 (document_7 스타일)
    print(f'Test set: Epoch: {epoch+1}, Average loss:{test_loss:.4f}, '
          f'Top-1 Accuracy: {accuracy:.4f}%, Time consumed:{eval_time:.2f}s')
    print()
    
    return test_loss, accuracy

# 메인 학습 루프
def main_training_loop(model, trainloader, testloader, criterion, optimizer, device, num_epochs=10):
    """
    메인 학습 루프
    """
    best_acc = 0.0
    
    # tqdm을 사용한 진행 상황 표시
    for epoch in tqdm(range(num_epochs)):
        # 학습
        train_loss, train_acc = train(model, trainloader, criterion, optimizer, device, epoch)
        
        # 평가
        test_loss, test_acc = evaluate(model, testloader, criterion, device, epoch)
        
        # WandB에 로깅
        wandb.log({
            "epoch": epoch + 1,
            "learning_rate": optimizer.param_groups[0]['lr'],
            "train_loss": train_loss,
            "train_accuracy": train_acc,
            "test_loss": test_loss,
            "test_accuracy": test_acc
        })
            
        # 최고 정확도 모델 저장
        if test_acc > best_acc:
            best_acc = test_acc
            print(f'new best accuracy: {best_acc:.2f}%')
            
            # 모델 저장
            model_path = f'best_model_{wandb.run.name}.pth'
            torch.save(model.state_dict(), model_path)
            
            # WandB에 모델 아티팩트 저장
            wandb.save(model_path)
    
    print(f'finish! best accuracy: {best_acc:.2f}%')
    wandb.run.summary["best_accuracy"] = best_acc

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 모델 초기화
model = resnet50().to(device)  
criterion = nn.CrossEntropyLoss()  # 손실 함수 정의
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])  # 옵티마이저 정의

# WandB에 모델 구조 기록
wandb.watch(model, log="all")

# GPU 가속
if torch.cuda.device_count() > 1:
    print(f"{torch.cuda.device_count()}개의 GPU를 사용합니다.")
    model = nn.DataParallel(model)

# 훈련 시작 시간 기록
start_time = time.time()

# 메인 학습 루프 호출
main_training_loop(
    model=model,
    trainloader=trainloader,
    testloader=testloader,
    criterion=criterion,
    optimizer=optimizer,
    device=device,
    num_epochs=config["num_epochs"]
)

# 훈련 종료 시간 기록 및 출력
end_time = time.time()
total_time = end_time - start_time
wandb.log({"total_training_time": total_time})

print(f"Total training time: {total_time:.2f} seconds")

# WandB 실행 종료
wandb.finish()



Files already downloaded and verified
Files already downloaded and verified
Using device: cuda
2개의 GPU를 사용합니다.


  0%|                                                                                                        | 0/20 [00:00<?, ?it/s]

Epoch [1], Batch [50/391], Loss: 4.0298
Epoch [1], Batch [100/391], Loss: 3.9639
Epoch [1], Batch [150/391], Loss: 3.7676
Epoch [1], Batch [200/391], Loss: 3.7366
Epoch [1], Batch [250/391], Loss: 3.1741
Epoch [1], Batch [300/391], Loss: 3.3485
Epoch [1], Batch [350/391], Loss: 3.0976
Train set: Epoch: 1, Average loss:3.6294, LR: 0.001000 Top-1 Accuracy: 14.2620%, Time consumed:136.77s


  5%|████▊                                                                                          | 1/20 [02:40<50:56, 160.89s/it]

Test set: Epoch: 1, Average loss:3.3422, Top-1 Accuracy: 19.4200%, Time consumed:24.01s

new best accuracy: 19.42%
Epoch [2], Batch [50/391], Loss: 3.0490
Epoch [2], Batch [100/391], Loss: 2.8896
Epoch [2], Batch [150/391], Loss: 2.7912
Epoch [2], Batch [200/391], Loss: 2.6927
Epoch [2], Batch [250/391], Loss: 2.5435
Epoch [2], Batch [300/391], Loss: 2.3125
Epoch [2], Batch [350/391], Loss: 2.2837
Train set: Epoch: 2, Average loss:2.6827, LR: 0.001000 Top-1 Accuracy: 30.8280%, Time consumed:138.39s


 10%|█████████▌                                                                                     | 2/20 [05:25<48:51, 162.89s/it]

Test set: Epoch: 2, Average loss:2.4428, Top-1 Accuracy: 35.9100%, Time consumed:25.73s

new best accuracy: 35.91%
Epoch [3], Batch [50/391], Loss: 2.3539
Epoch [3], Batch [100/391], Loss: 2.0003
Epoch [3], Batch [150/391], Loss: 1.9958
Epoch [3], Batch [200/391], Loss: 1.9782
Epoch [3], Batch [250/391], Loss: 1.8713
Epoch [3], Batch [300/391], Loss: 1.9979
Epoch [3], Batch [350/391], Loss: 2.2639
Train set: Epoch: 3, Average loss:2.1181, LR: 0.001000 Top-1 Accuracy: 42.7500%, Time consumed:138.80s


 15%|██████████████▎                                                                                | 3/20 [08:08<46:10, 162.95s/it]

Test set: Epoch: 3, Average loss:2.0910, Top-1 Accuracy: 43.4100%, Time consumed:24.09s

new best accuracy: 43.41%
Epoch [4], Batch [50/391], Loss: 1.5676
Epoch [4], Batch [100/391], Loss: 1.6410
Epoch [4], Batch [150/391], Loss: 1.8668
Epoch [4], Batch [200/391], Loss: 1.7898
Epoch [4], Batch [250/391], Loss: 1.8302
Epoch [4], Batch [300/391], Loss: 1.6693
Epoch [4], Batch [350/391], Loss: 1.7630
Train set: Epoch: 4, Average loss:1.7287, LR: 0.001000 Top-1 Accuracy: 51.8680%, Time consumed:130.59s


 20%|███████████████████                                                                            | 4/20 [10:42<42:34, 159.66s/it]

Test set: Epoch: 4, Average loss:1.8637, Top-1 Accuracy: 48.9400%, Time consumed:23.89s

new best accuracy: 48.94%
Epoch [5], Batch [50/391], Loss: 1.2933
Epoch [5], Batch [100/391], Loss: 1.1365
Epoch [5], Batch [150/391], Loss: 1.5346
Epoch [5], Batch [200/391], Loss: 1.4484
Epoch [5], Batch [250/391], Loss: 1.4076
Epoch [5], Batch [300/391], Loss: 1.5290
Epoch [5], Batch [350/391], Loss: 1.2781
Train set: Epoch: 5, Average loss:1.4041, LR: 0.001000 Top-1 Accuracy: 59.7280%, Time consumed:128.94s


 25%|███████████████████████▊                                                                       | 5/20 [13:16<39:21, 157.43s/it]

Test set: Epoch: 5, Average loss:1.7678, Top-1 Accuracy: 51.7100%, Time consumed:24.37s

new best accuracy: 51.71%
Epoch [6], Batch [50/391], Loss: 1.0433
Epoch [6], Batch [100/391], Loss: 0.9669
Epoch [6], Batch [150/391], Loss: 1.0686
Epoch [6], Batch [200/391], Loss: 1.2374
Epoch [6], Batch [250/391], Loss: 1.1422
Epoch [6], Batch [300/391], Loss: 1.2711
Epoch [6], Batch [350/391], Loss: 1.1228
Train set: Epoch: 6, Average loss:1.1068, LR: 0.001000 Top-1 Accuracy: 67.3720%, Time consumed:130.13s


 30%|████████████████████████████▌                                                                  | 6/20 [15:50<36:28, 156.30s/it]

Test set: Epoch: 6, Average loss:1.7758, Top-1 Accuracy: 52.3200%, Time consumed:23.85s

new best accuracy: 52.32%
Epoch [7], Batch [50/391], Loss: 0.7637
Epoch [7], Batch [100/391], Loss: 0.7555
Epoch [7], Batch [150/391], Loss: 0.9019
Epoch [7], Batch [200/391], Loss: 0.7866
Epoch [7], Batch [250/391], Loss: 0.7748
Epoch [7], Batch [300/391], Loss: 0.6968
Epoch [7], Batch [350/391], Loss: 0.7676
Train set: Epoch: 7, Average loss:0.8081, LR: 0.001000 Top-1 Accuracy: 75.5120%, Time consumed:134.29s


 35%|█████████████████████████████████▎                                                             | 7/20 [18:28<34:01, 157.02s/it]

Test set: Epoch: 7, Average loss:1.8129, Top-1 Accuracy: 54.1300%, Time consumed:24.08s

new best accuracy: 54.13%
Epoch [8], Batch [50/391], Loss: 0.3099
Epoch [8], Batch [100/391], Loss: 0.4450
Epoch [8], Batch [150/391], Loss: 0.4690
Epoch [8], Batch [200/391], Loss: 0.5830
Epoch [8], Batch [250/391], Loss: 0.5329
Epoch [8], Batch [300/391], Loss: 0.6182
Epoch [8], Batch [350/391], Loss: 0.4818
Train set: Epoch: 8, Average loss:0.5333, LR: 0.001000 Top-1 Accuracy: 83.5440%, Time consumed:141.02s


 40%|██████████████████████████████████████                                                         | 8/20 [21:13<31:54, 159.53s/it]

Test set: Epoch: 8, Average loss:2.0310, Top-1 Accuracy: 52.7500%, Time consumed:23.90s

Epoch [9], Batch [50/391], Loss: 0.2430
Epoch [9], Batch [100/391], Loss: 0.2604
Epoch [9], Batch [150/391], Loss: 0.3790
Epoch [9], Batch [200/391], Loss: 0.4073
Epoch [9], Batch [250/391], Loss: 0.2384
Epoch [9], Batch [300/391], Loss: 0.3669
Epoch [9], Batch [350/391], Loss: 0.3917
Train set: Epoch: 9, Average loss:0.3063, LR: 0.001000 Top-1 Accuracy: 90.6200%, Time consumed:133.58s


 45%|██████████████████████████████████████████▊                                                    | 9/20 [23:52<29:12, 159.32s/it]

Test set: Epoch: 9, Average loss:2.1112, Top-1 Accuracy: 53.5000%, Time consumed:25.26s

Epoch [10], Batch [50/391], Loss: 0.1703
Epoch [10], Batch [100/391], Loss: 0.1354
Epoch [10], Batch [150/391], Loss: 0.1752
Epoch [10], Batch [200/391], Loss: 0.1629
Epoch [10], Batch [250/391], Loss: 0.1677
Epoch [10], Batch [300/391], Loss: 0.2342
Epoch [10], Batch [350/391], Loss: 0.2288
Train set: Epoch: 10, Average loss:0.2033, LR: 0.001000 Top-1 Accuracy: 93.8860%, Time consumed:129.89s


 50%|███████████████████████████████████████████████                                               | 10/20 [26:26<26:16, 157.61s/it]

Test set: Epoch: 10, Average loss:2.2632, Top-1 Accuracy: 53.2600%, Time consumed:23.90s

Epoch [11], Batch [50/391], Loss: 0.1416
Epoch [11], Batch [100/391], Loss: 0.1603
Epoch [11], Batch [150/391], Loss: 0.1157
Epoch [11], Batch [200/391], Loss: 0.1994
Epoch [11], Batch [250/391], Loss: 0.1662
Epoch [11], Batch [300/391], Loss: 0.1944
Epoch [11], Batch [350/391], Loss: 0.1561
Train set: Epoch: 11, Average loss:0.1446, LR: 0.001000 Top-1 Accuracy: 95.7680%, Time consumed:131.85s


 55%|███████████████████████████████████████████████████▋                                          | 11/20 [29:02<23:33, 157.09s/it]

Test set: Epoch: 11, Average loss:2.4521, Top-1 Accuracy: 52.9200%, Time consumed:24.07s

Epoch [12], Batch [50/391], Loss: 0.0672
Epoch [12], Batch [100/391], Loss: 0.0620
Epoch [12], Batch [150/391], Loss: 0.1073
Epoch [12], Batch [200/391], Loss: 0.0857
Epoch [12], Batch [250/391], Loss: 0.1460
Epoch [12], Batch [300/391], Loss: 0.0940
Epoch [12], Batch [350/391], Loss: 0.1856
Train set: Epoch: 12, Average loss:0.1114, LR: 0.001000 Top-1 Accuracy: 96.7500%, Time consumed:129.64s


 60%|████████████████████████████████████████████████████████▍                                     | 12/20 [31:36<20:48, 156.08s/it]

Test set: Epoch: 12, Average loss:2.6333, Top-1 Accuracy: 51.2500%, Time consumed:24.10s

Epoch [13], Batch [50/391], Loss: 0.0945
Epoch [13], Batch [100/391], Loss: 0.0918
Epoch [13], Batch [150/391], Loss: 0.1282
Epoch [13], Batch [200/391], Loss: 0.0839
Epoch [13], Batch [250/391], Loss: 0.2562
Epoch [13], Batch [300/391], Loss: 0.1048
Epoch [13], Batch [350/391], Loss: 0.2421
Train set: Epoch: 13, Average loss:0.1264, LR: 0.001000 Top-1 Accuracy: 96.1020%, Time consumed:132.21s


 65%|█████████████████████████████████████████████████████████████                                 | 13/20 [34:12<18:13, 156.24s/it]

Test set: Epoch: 13, Average loss:2.5630, Top-1 Accuracy: 52.7700%, Time consumed:24.40s

Epoch [14], Batch [50/391], Loss: 0.1036
Epoch [14], Batch [100/391], Loss: 0.0856
Epoch [14], Batch [150/391], Loss: 0.0892
Epoch [14], Batch [200/391], Loss: 0.1525
Epoch [14], Batch [250/391], Loss: 0.0806
Epoch [14], Batch [300/391], Loss: 0.1718
Epoch [14], Batch [350/391], Loss: 0.0603
Train set: Epoch: 14, Average loss:0.1229, LR: 0.001000 Top-1 Accuracy: 96.1060%, Time consumed:137.35s


 70%|█████████████████████████████████████████████████████████████████▊                            | 14/20 [36:54<15:47, 157.93s/it]

Test set: Epoch: 14, Average loss:2.6861, Top-1 Accuracy: 51.9200%, Time consumed:24.47s

Epoch [15], Batch [50/391], Loss: 0.0926
Epoch [15], Batch [100/391], Loss: 0.0552
Epoch [15], Batch [150/391], Loss: 0.1412
Epoch [15], Batch [200/391], Loss: 0.1498
Epoch [15], Batch [250/391], Loss: 0.1355
Epoch [15], Batch [300/391], Loss: 0.1508
Epoch [15], Batch [350/391], Loss: 0.0868
Train set: Epoch: 15, Average loss:0.1069, LR: 0.001000 Top-1 Accuracy: 96.6020%, Time consumed:137.69s


 75%|██████████████████████████████████████████████████████████████████████▌                       | 15/20 [39:37<13:16, 159.32s/it]

Test set: Epoch: 15, Average loss:2.7173, Top-1 Accuracy: 52.4200%, Time consumed:24.87s

Epoch [16], Batch [50/391], Loss: 0.0767
Epoch [16], Batch [100/391], Loss: 0.0825
Epoch [16], Batch [150/391], Loss: 0.0949
Epoch [16], Batch [200/391], Loss: 0.1012
Epoch [16], Batch [250/391], Loss: 0.0909
Epoch [16], Batch [300/391], Loss: 0.1295
Epoch [16], Batch [350/391], Loss: 0.0936
Train set: Epoch: 16, Average loss:0.0909, LR: 0.001000 Top-1 Accuracy: 97.1180%, Time consumed:137.84s


 80%|███████████████████████████████████████████████████████████████████████████▏                  | 16/20 [42:20<10:42, 160.61s/it]

Test set: Epoch: 16, Average loss:2.7615, Top-1 Accuracy: 53.2500%, Time consumed:25.77s

Epoch [17], Batch [50/391], Loss: 0.0256
Epoch [17], Batch [100/391], Loss: 0.1063
Epoch [17], Batch [150/391], Loss: 0.0269
Epoch [17], Batch [200/391], Loss: 0.1085
Epoch [17], Batch [250/391], Loss: 0.0704
Epoch [17], Batch [300/391], Loss: 0.0692
Epoch [17], Batch [350/391], Loss: 0.0857
Train set: Epoch: 17, Average loss:0.0713, LR: 0.001000 Top-1 Accuracy: 97.7360%, Time consumed:133.44s


 85%|███████████████████████████████████████████████████████████████████████████████▉              | 17/20 [45:00<08:00, 160.24s/it]

Test set: Epoch: 17, Average loss:2.9736, Top-1 Accuracy: 52.3800%, Time consumed:25.91s

Epoch [18], Batch [50/391], Loss: 0.0330
Epoch [18], Batch [100/391], Loss: 0.0255
Epoch [18], Batch [150/391], Loss: 0.0658
Epoch [18], Batch [200/391], Loss: 0.0337
Epoch [18], Batch [250/391], Loss: 0.0456
Epoch [18], Batch [300/391], Loss: 0.1444
Epoch [18], Batch [350/391], Loss: 0.0642
Train set: Epoch: 18, Average loss:0.0711, LR: 0.001000 Top-1 Accuracy: 97.7980%, Time consumed:137.47s


 90%|████████████████████████████████████████████████████████████████████████████████████▌         | 18/20 [47:43<05:22, 161.14s/it]

Test set: Epoch: 18, Average loss:2.9213, Top-1 Accuracy: 52.3500%, Time consumed:25.78s

Epoch [19], Batch [50/391], Loss: 0.0498
Epoch [19], Batch [100/391], Loss: 0.0939
Epoch [19], Batch [150/391], Loss: 0.0565
Epoch [19], Batch [200/391], Loss: 0.1902
Epoch [19], Batch [250/391], Loss: 0.0587
Epoch [19], Batch [300/391], Loss: 0.0331
Epoch [19], Batch [350/391], Loss: 0.0870
Train set: Epoch: 19, Average loss:0.0896, LR: 0.001000 Top-1 Accuracy: 97.1520%, Time consumed:139.54s


 95%|█████████████████████████████████████████████████████████████████████████████████████████▎    | 19/20 [50:26<02:41, 161.63s/it]

Test set: Epoch: 19, Average loss:3.3095, Top-1 Accuracy: 50.7000%, Time consumed:23.21s

Epoch [20], Batch [50/391], Loss: 0.0545
Epoch [20], Batch [100/391], Loss: 0.0499
Epoch [20], Batch [150/391], Loss: 0.0306
Epoch [20], Batch [200/391], Loss: 0.0350
Epoch [20], Batch [250/391], Loss: 0.0907
Epoch [20], Batch [300/391], Loss: 0.0412
Epoch [20], Batch [350/391], Loss: 0.1017
Train set: Epoch: 20, Average loss:0.0736, LR: 0.001000 Top-1 Accuracy: 97.6180%, Time consumed:131.23s


100%|██████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [53:09<00:00, 159.49s/it]

Test set: Epoch: 20, Average loss:2.9964, Top-1 Accuracy: 53.3200%, Time consumed:32.37s

finish! best accuracy: 54.13%
Total training time: 3189.73 seconds





0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_accuracy,▁▄▆▇███████▇██████▇█
test_loss,█▄▂▁▁▁▁▂▃▃▄▅▅▅▅▅▆▆█▆
total_training_time,▁
train_accuracy,▁▂▃▄▅▅▆▇▇███████████
train_loss,█▆▅▄▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_accuracy,54.13
epoch,20.0
learning_rate,0.001
test_accuracy,53.32
test_loss,2.99645
total_training_time,3189.73193
train_accuracy,97.618
train_loss,0.07356
