In [1]:
# 필요 라이브러리 import
import os, sys
import random
import time
import gc

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.models as models
from torchvision import datasets, transforms

from torch.utils.data import Dataset, DataLoader, random_split
from torchsummary import summary

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from tqdm import tqdm
from timm import create_model

In [2]:
# 데이터 경로 및 SEED 설정
DATA_DIR = "dataset"
SEED = 0xC0FFEE     # 12648430

# 재현성을 위한 시드 고정
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# 데이터 로드

In [3]:
# 이미지 전처리 정의
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),                                    # 흑백 이미지 load (output = 3ch)
    transforms.Resize((224, 224)),                                                  # 224x224 resize
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])     # ImageNet 정규화
    ])

# ImageFolder를 사용하여 데이터셋 만들기
original_dataset = datasets.ImageFolder(root="augmented_trainset")

# train/validation 분리
train_size = len(original_dataset) - 800
val_size = 800
train_dataset, val_dataset = random_split(original_dataset, [train_size, val_size])

# transform 적용
train_dataset.dataset.transform = transform
val_dataset.dataset.transform = transform

In [4]:
# DataLoader 인스턴스 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=8)
validation_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=8)

In [5]:
# 데이터 형태 확인
x, y = next(iter(train_loader))
print(x.shape, y.shape)

x, y = next(iter(validation_loader))
print(x.shape, y.shape)

torch.Size([32, 3, 224, 224]) torch.Size([32])
torch.Size([1, 3, 224, 224]) torch.Size([1])


# Label 확인

In [5]:
classes = original_dataset.classes
print(classes)

idx_to_class = {v: k for k, v in original_dataset.class_to_idx.items()}
print(idx_to_class)

['aphids', 'armyworm', 'blisterbeetle', 'cicadellidae', 'cornborer', 'cricket', 'delicatula', 'limacodidae', 'miridae', 'viridis']
{0: 'aphids', 1: 'armyworm', 2: 'blisterbeetle', 3: 'cicadellidae', 4: 'cornborer', 5: 'cricket', 6: 'delicatula', 7: 'limacodidae', 8: 'miridae', 9: 'viridis'}


# 모델 정의

In [6]:
# CUDA 사용 가능 여부 확인
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [7]:
# 모델 정의
class SwinModel(nn.Module):
    def __init__(self, num_classes):
        super(SwinModel, self).__init__()
        self.swin = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=num_classes)

    def forward(self, x):
        return self.swin(x)
    
model = SwinModel(num_classes=len(classes)).to(device)

model.safetensors:   0%|          | 0.00/788M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [8]:
# 모델 확인
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 192, 56, 56]           9,408
         LayerNorm-2          [-1, 56, 56, 192]             384
        PatchEmbed-3          [-1, 56, 56, 192]               0
          Identity-4          [-1, 56, 56, 192]               0
         LayerNorm-5          [-1, 56, 56, 192]             384
            Linear-6              [-1, 49, 576]         111,168
           Softmax-7            [-1, 6, 49, 49]               0
           Dropout-8            [-1, 6, 49, 49]               0
            Linear-9              [-1, 49, 192]          37,056
          Dropout-10              [-1, 49, 192]               0
  WindowAttention-11              [-1, 49, 192]               0
         Identity-12          [-1, 56, 56, 192]               0
        LayerNorm-13            [-1, 3136, 192]             384
           Linear-14            [-1, 31

In [9]:
# 손실 함수와 최적화 함수 정의
optimizer = optim.Adam(model.parameters(), lr=1e-6)     # 1e-6 = 0.000001
loss_fn = nn.CrossEntropyLoss()

# 모델 훈련 및 검증

In [10]:
# 학습 함수 정의
def fit(model, data_loader, loss_fn, optimizer, device, phase='train'):
    # phase에 따라 모델의 모드 설정
    if phase == 'train':
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    running_corrects = 0

    # tqdm을 사용하여 반복문 진행 상황 시각화
    prograss_bar = tqdm(data_loader, leave=False)

    # mini-batch 단위 학습 시작
    for img, lbl in prograss_bar:
        img, lbl = img.to(device), lbl.to(device)

        optimizer.zero_grad()           # 누적 Gradient 초기화

        # Gradient 계산을 통한 Forward Propagation
        with torch.set_grad_enabled(phase == 'train'):
            pred = model(img)           # Forward Propagation 수행
            loss = loss_fn(pred, lbl)   # 손실 값 계산

            if phase == 'train':        # 학습 모드인 경우 Backward Propagation 및 가중치 업데이트 수행
                loss.backward()
                optimizer.step()

        pred = pred.argmax(1)           # pred의 확률값을 클래스 레이블로 변환
        running_loss += loss.item()     # 손실 값 누적
        running_corrects += torch.sum(pred == lbl.data)    # 정답 수 누적

    # 손실 값과 정확도 계산
    final_acc = running_corrects / len(data_loader.dataset)
    final_loss = running_loss / len(data_loader.dataset)

    return final_loss, final_acc

In [15]:
# Epoch별 모델 학습
num_epochs = 30

min_loss = np.inf
max_acc = 0.0

record_train_loss, record_train_acc = [], []
record_valid_loss, record_valid_acc = [], []

STATE_DICT_PATH = "augmented_SwinLarge.pth"

for epoch in range(num_epochs):
    start = time.time()

    # 학습 및 검증 단계 진행
    train_loss, train_acc = fit(
        model, train_loader, loss_fn, optimizer, device, phase='train'
    )
    valid_loss, valid_acc = fit(
        model, validation_loader, loss_fn, optimizer, device, phase='valid'
    )

    # 학습 결과 기록
    record_train_loss.append(train_loss)
    record_train_acc.append(train_acc)
    record_valid_loss.append(valid_loss)
    record_valid_acc.append(valid_acc)

    # 성능이 좋아질 경우 모델 저장
    if valid_loss < min_loss:
        print(
            f"[INFO] val_loss has been improved from {min_loss:.5f} to {valid_loss:.5f}. Saving Model!"
        )
        min_loss = valid_loss
        torch.save(model.state_dict(), STATE_DICT_PATH)

    # 학습 시간 및 손실 값, 정확도 출력
    time_elapsed = time.time() - start
    print(
        f"[Epoch{epoch+1:02d}] time: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s \t loss: {train_loss:.5f}, acc: {train_acc:.5f} | val_loss: {valid_loss:.5f}, val_acc: {valid_acc:.5f}"
    )

                                                   

[INFO] val_loss has been improved from inf to 0.04715. Saving Model!
[Epoch01] time: 4m 25s 	 loss: 0.00014, acc: 0.99932 | val_loss: 0.04715, val_acc: 0.98750


                                                   

[INFO] val_loss has been improved from 0.04715 to 0.04395. Saving Model!
[Epoch02] time: 4m 24s 	 loss: 0.00009, acc: 0.99960 | val_loss: 0.04395, val_acc: 0.99000


                                                

KeyboardInterrupt: 

# 검증 정확도 확인

> 저장한 모델의 가중치 load

In [11]:
model.load_state_dict(torch.load("augmented_SwinLarge.pth"))    # 가장 성능이 좋았던 모델 불러오기

<All keys matched successfully>

In [12]:
# validation 데이터에 대한 정답값 확인
y_true = []
y_pred = []

model.eval()
for img, lbl in validation_loader:
    img, lbl = img.to(device), lbl.to(device)

    with torch.no_grad():
        pred = model(img)

    pred = pred.argmax(1)
    y_true.extend(lbl.cpu().numpy())
    y_pred.extend(pred.cpu().numpy())

In [13]:
predictons = []
model = model.to(device)
model.eval()

with torch.no_grad():
    running_loss = 0.0
    running_corrects = 0

    for img, lbl in validation_loader:
        img, lbl = img.to(device), lbl.to(device)

        pred = model(img)
        loss = loss_fn(pred, lbl)

        running_loss += loss.item()
        running_corrects += torch.sum(pred.argmax(1) == lbl.data)

        predictons.extend(pred.argmax(1).cpu().numpy())     # extend: 리스트에 다른 리스트의 요소를 추가할 때 사용

    # 손실 값과 정확도 계산
    final_acc = running_corrects / len(validation_loader.dataset)
    final_loss = running_loss / len(validation_loader.dataset)

In [15]:
# 결과 확인
print(f"<<Final Performances>>  Loss: {final_loss:.5f} | Accuracy: {final_acc:.5f}")
print(f"Prediction Length: {len(predictons)}  |  Prediction Example: {predictons[:15]}")

<<Final Performances>>  Loss: 0.04395 | Accuracy: 0.99000
Prediction Length: 800  |  Prediction Example: [4, 1, 9, 1, 0, 7, 2, 8, 1, 5, 4, 4, 5, 5, 6]


# 최종 예측 수행

In [21]:
problem = pd.read_csv(os.path.join(DATA_DIR, "problem.csv"))
problem.head()

Unnamed: 0,FilePath
0,./dataset/problemset/001.jpg
1,./dataset/problemset/002.jpg
2,./dataset/problemset/003.jpg
3,./dataset/problemset/004.jpg
4,./dataset/problemset/005.jpg


In [22]:
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]      # 'FilePath' 열에서 이미지 경로 가져오기
        image = Image.open(img_path).convert("L")   # 이미지를 흑백으로 로드

        if self.transform:
            image = self.transform(image)

        return image


# 이미지를 불러올 때 적용할 전처리 정의: resize, to tensor
problem_transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=3),                                    # 흑백 이미지 load (output = 3ch)
        transforms.Resize((224, 224)),                                                  # 224x224 resize
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])     # ImageNet 정규화
    ]
)


# 커스텀 데이터셋 인스턴스 생성
custom_dataset = CustomImageDataset(dataframe=problem, transform=problem_transform)

# DataLoader 인스턴스 생성
problem_loader = DataLoader(custom_dataset, batch_size=1, shuffle=False)

In [23]:
predictions = []

# 검증모드 진입
model.eval()

with torch.no_grad():
    # loss 초기화
    running_loss = 0
    # 정확도 계산
    running_acc = 0
    for img in problem_loader:
        img = img.to(device)

        y_hat = model(img)
        label = y_hat.argmax(dim=1).detach().item()
        predictions.append(label)

# 숫자 라벨을 클래스 이름으로 변환
your_answer = [idx_to_class[l] for l in predictions]

In [24]:
submission = pd.read_csv(os.path.join(DATA_DIR, "submission.csv"))
submission["Label"] = your_answer

In [25]:
# 제출 파일 저장
submission.to_csv("augmented_SwinLarge.csv", index=False)