In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision import models
import os
import numpy as np

In [3]:
# 1. 데이터셋 경로 설정
train_dir = '/content/drive/MyDrive/kfood_health_train'
val_dir = '/content/drive/MyDrive/kfood_health_val'

In [4]:
# 2. 데이터 전처리 설정
class Cutout(object):
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = int(np.clip(y - self.length // 2, 0, h))
            y2 = int(np.clip(y + self.length // 2, 0, h))
            x1 = int(np.clip(x - self.length // 2, 0, w))
            x2 = int(np.clip(x + self.length // 2, 0, w))

            mask[y1: y2,x1: x2] = 0.

        mask=torch.from_numpy(mask)
        mask=mask.expand_as(img)

        return img*mask

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Training data augmentation
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # Random resize and crop to 224x224
    transforms.RandomHorizontalFlip(),  # Random horizontal flip
    transforms.RandomRotation(degrees=15), # 회전 추가
    transforms.ToTensor(),  # Convert image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
    Cutout(n_holes=1,length=16) # 노이즈 추가
])

# Validation: only normalization (and potentially resizing)
val_transform = transforms.Compose([
    transforms.Resize(256),  # Resize to 256x256
    transforms.CenterCrop(224),  # Crop to 224x224 around the center
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.ImageFolder(root=train_dir, transform=train_transform)
val_dataset = torchvision.datasets.ImageFolder(root=val_dir, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

In [5]:
# 3. 모델 불러오기 및 수정
model = models.resnet50(pretrained=False)

# 체크포인트 불러오기
checkpoint = torch.load('/content/drive/MyDrive/model_checkpoint_resnet50_fin.pth', map_location='cpu')

# 마지막 계층 변경
# 원래 체크포인트 모델의 'fc' 계층을 새로운 클래스 수에 맞는 계층으로 변경
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 13)

# 'fc' 계층의 가중치를 제외하고 모델에 로드
model_dict = model.state_dict()
checkpoint_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if k in model_dict and model_dict[k].shape == checkpoint['model_state_dict'][k].shape}
model_dict.update(checkpoint_dict)
model.load_state_dict(model_dict)

# GPU 사용 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
# 손실함수와 최적화 함수 설정
from collections import Counter
from torch import optim

# 각 클래스별 이미지의 개수 계산
class_counts = Counter(train_dataset.targets)

print(class_counts)

# class_counts를 리스트로 변환
class_counts = [class_counts[i] for i in range(len(class_counts))]
total_size = sum(class_counts)  # 전체 크기를 계산

class_weights = [total_size / count for count in class_counts]
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

optimizer = optim.Adam(model.parameters(), lr=0.001)

Counter({10: 1551, 1: 1267, 3: 1237, 2: 1177, 8: 1160, 12: 1152, 9: 1040, 4: 1032, 7: 962, 6: 951, 5: 891, 11: 864, 0: 831})


In [7]:
!pip install wandb --upgrade

Collecting wandb
  Downloading wandb-0.15.12-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.34.0-py2.py3-none-any.whl (243 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.9/243.9 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools (from wandb)
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.m

In [8]:
import wandb

!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [9]:
import os
os.environ["WANDB_START_METHOD"] = "thread"

In [10]:
# Wandb 실험 초기화
wandb.init(project='M3_res50_weight_trans', entity='deuldeulkang',name='M3_res50_weight_trans')  # 프로젝트와 사용자 이름을 꼭 바꿔주세요!
# Wandb 설정 저장

wandb.config = {
  "learning_rate": 0.001,
  "epochs": 30,
  "batch_size": 64
}
wandb.watch(model)

[34m[1mwandb[0m: Currently logged in as: [33mdeuldeulkang[0m. Use [1m`wandb login --relogin`[0m to force relogin


[]

In [11]:
from tqdm import tqdm

# 5. 학습 및 검증 함수 정의
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    return running_loss / len(train_loader.dataset)

def validate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc="Validating"):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += (preds == labels).sum().item()
            total_predictions += labels.size(0)

    average_loss = running_loss / len(val_loader.dataset)
    accuracy = (correct_predictions / total_predictions) * 100
    return average_loss, accuracy


- 체크포인트 불러오는거 필요한 것만 불러오게 수정한 다음에 써야할듯

In [12]:
# 모델 로드와 학습 재개
start_epoch = 0  # 시작 에포크 초기값 설정
best_acc = 0.0  # 최고 정확도 초기값 설정
save_path = "/content/drive/MyDrive/Mission3_res50_weight_trans.pt"

# 체크포인트 파일이 존재하는지 확인 후, 로드
try:
    checkpoint = torch.load(save_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    best_acc = checkpoint['best_acc']
    print(f"Loaded checkpoint from epoch {start_epoch}, best accuracy was {best_acc:.2f}%")
except FileNotFoundError:
    print("No checkpoint file found, starting training from scratch.")


No checkpoint file found, starting training from scratch.


In [None]:
# 6. 학습 시작
num_epochs = 30
best_acc = 0.0
for epoch in range(start_epoch, num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.2f}%")

    wandb.log({
         "Epoch": epoch,
         "Train Loss": train_loss,
         "Validation Loss": val_loss,
         "Validation Accuracy": val_acc

    })

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_acc': best_acc,
        }, '/content/drive/MyDrive/Mission3_res50_weight_trans.pt')

wandb.finish()

print("Training Complete!")

Training: 100%|██████████| 221/221 [43:14<00:00, 11.74s/it]
Validating: 100%|██████████| 28/28 [05:30<00:00, 11.79s/it]


Epoch 1/30 - Train Loss: 0.6892 - Val Loss: 0.2954 - Val Acc: 88.95%


Training: 100%|██████████| 221/221 [02:58<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.05s/it]


Epoch 2/30 - Train Loss: 0.3904 - Val Loss: 0.1890 - Val Acc: 93.37%


Training: 100%|██████████| 221/221 [02:56<00:00,  1.25it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.08s/it]


Epoch 3/30 - Train Loss: 0.3123 - Val Loss: 0.1431 - Val Acc: 94.73%


Training: 100%|██████████| 221/221 [02:58<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.04s/it]


Epoch 4/30 - Train Loss: 0.2791 - Val Loss: 0.1151 - Val Acc: 95.01%


Training: 100%|██████████| 221/221 [02:57<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.06s/it]


Epoch 5/30 - Train Loss: 0.2460 - Val Loss: 0.0909 - Val Acc: 95.69%


Training: 100%|██████████| 221/221 [02:55<00:00,  1.26it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.07s/it]


Epoch 6/30 - Train Loss: 0.2422 - Val Loss: 0.0956 - Val Acc: 95.80%


Training: 100%|██████████| 221/221 [03:00<00:00,  1.23it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.09s/it]


Epoch 7/30 - Train Loss: 0.2103 - Val Loss: 0.0993 - Val Acc: 95.80%


Training: 100%|██████████| 221/221 [03:01<00:00,  1.22it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.08s/it]


Epoch 8/30 - Train Loss: 0.2204 - Val Loss: 0.0857 - Val Acc: 96.26%


Training: 100%|██████████| 221/221 [03:03<00:00,  1.21it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.07s/it]


Epoch 9/30 - Train Loss: 0.1997 - Val Loss: 0.0602 - Val Acc: 97.11%


Training: 100%|██████████| 221/221 [02:59<00:00,  1.23it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.07s/it]


Epoch 10/30 - Train Loss: 0.2050 - Val Loss: 0.0613 - Val Acc: 96.60%


Training: 100%|██████████| 221/221 [03:00<00:00,  1.22it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.08s/it]


Epoch 11/30 - Train Loss: 0.1788 - Val Loss: 0.0588 - Val Acc: 97.39%


Training: 100%|██████████| 221/221 [03:01<00:00,  1.22it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.09s/it]


Epoch 12/30 - Train Loss: 0.1767 - Val Loss: 0.0567 - Val Acc: 97.45%


Training: 100%|██████████| 221/221 [03:03<00:00,  1.20it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.10s/it]


Epoch 13/30 - Train Loss: 0.1684 - Val Loss: 0.0594 - Val Acc: 97.22%


Training: 100%|██████████| 221/221 [03:05<00:00,  1.19it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.08s/it]


Epoch 14/30 - Train Loss: 0.1627 - Val Loss: 0.0561 - Val Acc: 97.05%


Training: 100%|██████████| 221/221 [03:01<00:00,  1.22it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.08s/it]


Epoch 15/30 - Train Loss: 0.1679 - Val Loss: 0.0467 - Val Acc: 97.85%


Training: 100%|██████████| 221/221 [02:58<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.05s/it]


Epoch 16/30 - Train Loss: 0.1670 - Val Loss: 0.0596 - Val Acc: 97.45%


Training: 100%|██████████| 221/221 [02:57<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:30<00:00,  1.10s/it]


Epoch 17/30 - Train Loss: 0.1485 - Val Loss: 0.0487 - Val Acc: 97.56%


Training: 100%|██████████| 221/221 [02:57<00:00,  1.24it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.04s/it]


Epoch 18/30 - Train Loss: 0.1588 - Val Loss: 0.0451 - Val Acc: 97.51%


Training: 100%|██████████| 221/221 [02:55<00:00,  1.26it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.04s/it]


Epoch 19/30 - Train Loss: 0.1481 - Val Loss: 0.0492 - Val Acc: 97.39%


Training: 100%|██████████| 221/221 [02:56<00:00,  1.25it/s]
Validating: 100%|██████████| 28/28 [00:29<00:00,  1.07s/it]


Epoch 20/30 - Train Loss: 0.1455 - Val Loss: 0.0455 - Val Acc: 97.51%


Training: 100%|██████████| 221/221 [02:54<00:00,  1.27it/s]
Validating: 100%|██████████| 28/28 [00:28<00:00,  1.03s/it]


Epoch 21/30 - Train Loss: 0.1439 - Val Loss: 0.0708 - Val Acc: 96.77%


Training: 100%|██████████| 221/221 [02:52<00:00,  1.28it/s]
Validating: 100%|██████████| 28/28 [00:28<00:00,  1.03s/it]


Epoch 22/30 - Train Loss: 0.1412 - Val Loss: 0.0489 - Val Acc: 97.17%


Training:  59%|█████▉    | 130/221 [01:43<01:10,  1.30it/s]