# DataLoader

압축푼 직후에는 파일적용이 되지 않아 FileNotFoundError 오류가 뜰 수 있습니다.

그러한 경우 약간의 대기 시간 이후 다시 실행하면 됩니다.

In [1]:
# 랜덤성 제어를 위한 seed 고정
import random
import numpy as np
import torch

seed = 0
deterministic = True

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if deterministic:
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from torchvision.transforms import v2

from PIL import Image
from tqdm import tqdm
import pandas as pd

In [3]:
# 데이터셋 디렉토리 위치 지정
data_path = "/home/kim/Desktop/AI_project01_data/data"

In [4]:
from glob import glob

# 커스텀 데이터셋 클래스
class MyDataset(Dataset):
    def __init__(self, data_path, transform=None, train=True):
        self.train = train
        train_df = pd.read_csv(f"{data_path}/train_data.csv")

        self.name2label = dict(zip(train_df["name"], train_df["label"]))

        if self.train:
            self.img_path = glob(f"{data_path}/train_data/*.png")
            self.labels =  [self.name2label[d.split("/")[-1]] for d in self.img_path]
        else:
            self.img_path = glob(f"{data_path}/test_data/*.png")

        self.transform = transform

    def __len__(self):
        return len(self.img_path)   

    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if img.mode != 'RGB':
            img = img.convert('RGB')

        if self.transform:
            img = self.transform(img)

        if self.train:
            return img, self.labels[index]
        else:
            return img, self.img_path[index].split("/")[-1]

In [5]:
'''
데이터 전처리
    - transform.Compose에 전처리할 순서를 차례로 지정한 후 리스트 형태로 입력하여 데이터 생성시 설정한 전처리를 적용
    - 여러 가지의 데이터 증강 기법이 들어감
'''
transform =  transforms.Compose([
    # To-do: 증강 기법 적용
    v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=15),
    v2.Resize(224),
    v2.ToTensor(),
    v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

test_transform =  transforms.Compose([
    v2.Resize(224),
    v2.ToTensor(),
    v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

train_data = MyDataset(data_path, train=True, transform=transform)
test_data = MyDataset(data_path, train=False, transform=test_transform)

# Split train data into train and validation
train_size = int(len(train_data) * 0.9)
train_data, val_data = torch.utils.data.random_split(train_data, [train_size, len(train_data) - train_size])


train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)



# Model

In [6]:
from torchvision.models import resnet18

# Torchvision 라이브러리에서 모델 불러오기
model = resnet18(pretrained=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)



In [7]:
criterion = nn.CrossEntropyLoss() # 바꿔보기
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 바꿔보기

# 학습률 스케줄러: 검증 손실이 개선되지 않으면 학습률 감소
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',           # 손실을 최소화
    factor=0.5,           # 학습률을 0.5배로 감소
    patience=10,          # 10 에포크 동안 개선 없으면 감소
)

num_epochs = 200 # 바꿔보기
patience = 30
best_val_loss = float('inf')
epochs_no_improve = 0
best_model_state = None

total_step = len(train_loader)
for epoch in range(num_epochs):
    model.train() #
    total_loss = 0

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))

    for i, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        pbar.set_description(f'Epoch [{epoch+1}/{num_epochs}], Loss: {round(total_loss / (i+1),4)}')


    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        val_loss = 0 #
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            val_loss += criterion(outputs, labels).item() * labels.size(0) #
        print(f'Accuracy of the model on the test images: {100 * correct / total} %')
    
    # ===================================================
    scheduler.step(val_loss / total) #


    # Early Stopping 체크
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_model_state = model.state_dict().copy()
    else:
        epochs_no_improve += 1

    # Early Stopping
    if epochs_no_improve >= patience:
        print("=" * 90)
        print(f"Early Stopping at Epoch {epoch+1}")
        break

if best_model_state is not None:
    model.load_state_dict(best_model_state)
    print(f"최고 성능 모델로 복원 (검증 손실: {best_val_loss:.4f}, 검증 정확도: {100 * correct / total} %)")

Epoch [1/200], Loss: 2.2032: 100%|██████████| 36/36 [00:13<00:00,  2.71it/s]


Accuracy of the model on the test images: 26.0 %


Epoch [2/200], Loss: 1.5632: 100%|██████████| 36/36 [00:10<00:00,  3.31it/s]


Accuracy of the model on the test images: 35.2 %


Epoch [3/200], Loss: 1.4783: 100%|██████████| 36/36 [00:10<00:00,  3.32it/s]


Accuracy of the model on the test images: 34.4 %


Epoch [4/200], Loss: 1.3958: 100%|██████████| 36/36 [00:10<00:00,  3.29it/s]


Accuracy of the model on the test images: 42.0 %


Epoch [5/200], Loss: 1.3099: 100%|██████████| 36/36 [00:11<00:00,  3.26it/s]


Accuracy of the model on the test images: 49.4 %


Epoch [6/200], Loss: 1.2151: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 43.8 %


Epoch [7/200], Loss: 1.1495: 100%|██████████| 36/36 [00:10<00:00,  3.32it/s]


Accuracy of the model on the test images: 50.4 %


Epoch [8/200], Loss: 1.1375: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 44.2 %


Epoch [9/200], Loss: 1.0459: 100%|██████████| 36/36 [00:11<00:00,  3.06it/s]


Accuracy of the model on the test images: 52.8 %


Epoch [10/200], Loss: 0.9891: 100%|██████████| 36/36 [00:10<00:00,  3.35it/s]


Accuracy of the model on the test images: 54.8 %


Epoch [11/200], Loss: 0.9365: 100%|██████████| 36/36 [00:10<00:00,  3.38it/s]


Accuracy of the model on the test images: 59.2 %


Epoch [12/200], Loss: 0.9022: 100%|██████████| 36/36 [00:10<00:00,  3.30it/s]


Accuracy of the model on the test images: 57.6 %


Epoch [13/200], Loss: 0.8696: 100%|██████████| 36/36 [00:10<00:00,  3.31it/s]


Accuracy of the model on the test images: 55.0 %


Epoch [14/200], Loss: 0.8388: 100%|██████████| 36/36 [00:11<00:00,  3.22it/s]


Accuracy of the model on the test images: 59.6 %


Epoch [15/200], Loss: 0.7722: 100%|██████████| 36/36 [00:10<00:00,  3.39it/s]


Accuracy of the model on the test images: 58.6 %


Epoch [16/200], Loss: 0.7474: 100%|██████████| 36/36 [00:10<00:00,  3.35it/s]


Accuracy of the model on the test images: 60.8 %


Epoch [17/200], Loss: 0.7008: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 57.2 %


Epoch [18/200], Loss: 0.683: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s] 


Accuracy of the model on the test images: 58.4 %


Epoch [19/200], Loss: 0.6759: 100%|██████████| 36/36 [00:11<00:00,  3.18it/s]


Accuracy of the model on the test images: 57.8 %


Epoch [20/200], Loss: 0.6379: 100%|██████████| 36/36 [00:11<00:00,  3.21it/s]


Accuracy of the model on the test images: 62.2 %


Epoch [21/200], Loss: 0.6586: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 62.2 %


Epoch [22/200], Loss: 0.6154: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 65.0 %


Epoch [23/200], Loss: 0.536: 100%|██████████| 36/36 [00:11<00:00,  3.20it/s] 


Accuracy of the model on the test images: 62.4 %


Epoch [24/200], Loss: 0.5301: 100%|██████████| 36/36 [00:10<00:00,  3.28it/s]


Accuracy of the model on the test images: 64.0 %


Epoch [25/200], Loss: 0.4948: 100%|██████████| 36/36 [00:10<00:00,  3.49it/s]


Accuracy of the model on the test images: 63.0 %


Epoch [26/200], Loss: 0.5146: 100%|██████████| 36/36 [00:10<00:00,  3.32it/s]


Accuracy of the model on the test images: 66.8 %


Epoch [27/200], Loss: 0.46: 100%|██████████| 36/36 [00:10<00:00,  3.29it/s]  


Accuracy of the model on the test images: 66.0 %


Epoch [28/200], Loss: 0.4505: 100%|██████████| 36/36 [00:10<00:00,  3.40it/s]


Accuracy of the model on the test images: 63.8 %


Epoch [29/200], Loss: 0.4228: 100%|██████████| 36/36 [00:10<00:00,  3.29it/s]


Accuracy of the model on the test images: 63.4 %


Epoch [30/200], Loss: 0.3997: 100%|██████████| 36/36 [00:11<00:00,  3.22it/s]


Accuracy of the model on the test images: 66.8 %


Epoch [31/200], Loss: 0.3488: 100%|██████████| 36/36 [00:10<00:00,  3.31it/s]


Accuracy of the model on the test images: 65.2 %


Epoch [32/200], Loss: 0.3525: 100%|██████████| 36/36 [00:10<00:00,  3.27it/s]


Accuracy of the model on the test images: 64.4 %


Epoch [33/200], Loss: 0.3335: 100%|██████████| 36/36 [00:11<00:00,  3.26it/s]


Accuracy of the model on the test images: 65.4 %


Epoch [34/200], Loss: 0.3238: 100%|██████████| 36/36 [00:10<00:00,  3.48it/s]


Accuracy of the model on the test images: 66.4 %


Epoch [35/200], Loss: 0.2817: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 66.0 %


Epoch [36/200], Loss: 0.2905: 100%|██████████| 36/36 [00:10<00:00,  3.46it/s]


Accuracy of the model on the test images: 68.6 %


Epoch [37/200], Loss: 0.264: 100%|██████████| 36/36 [00:10<00:00,  3.36it/s] 


Accuracy of the model on the test images: 67.0 %


Epoch [38/200], Loss: 0.1735: 100%|██████████| 36/36 [00:10<00:00,  3.38it/s]


Accuracy of the model on the test images: 69.2 %


Epoch [39/200], Loss: 0.1369: 100%|██████████| 36/36 [00:10<00:00,  3.38it/s]


Accuracy of the model on the test images: 69.8 %


Epoch [40/200], Loss: 0.1116: 100%|██████████| 36/36 [00:11<00:00,  3.17it/s]


Accuracy of the model on the test images: 72.0 %


Epoch [41/200], Loss: 0.1091: 100%|██████████| 36/36 [00:10<00:00,  3.27it/s]


Accuracy of the model on the test images: 73.6 %


Epoch [42/200], Loss: 0.092: 100%|██████████| 36/36 [00:10<00:00,  3.30it/s] 


Accuracy of the model on the test images: 70.6 %


Epoch [43/200], Loss: 0.0754: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 72.8 %


Epoch [44/200], Loss: 0.1048: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 71.2 %


Epoch [45/200], Loss: 0.111: 100%|██████████| 36/36 [00:10<00:00,  3.35it/s] 


Accuracy of the model on the test images: 70.2 %


Epoch [46/200], Loss: 0.0809: 100%|██████████| 36/36 [00:11<00:00,  3.07it/s]


Accuracy of the model on the test images: 69.0 %


Epoch [47/200], Loss: 0.0682: 100%|██████████| 36/36 [00:10<00:00,  3.33it/s]


Accuracy of the model on the test images: 72.0 %


Epoch [48/200], Loss: 0.0819: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 72.0 %


Epoch [49/200], Loss: 0.0588: 100%|██████████| 36/36 [00:10<00:00,  3.35it/s]


Accuracy of the model on the test images: 71.8 %


Epoch [50/200], Loss: 0.0478: 100%|██████████| 36/36 [00:10<00:00,  3.32it/s]


Accuracy of the model on the test images: 72.2 %


Epoch [51/200], Loss: 0.0582: 100%|██████████| 36/36 [00:10<00:00,  3.38it/s]


Accuracy of the model on the test images: 70.8 %


Epoch [52/200], Loss: 0.0671: 100%|██████████| 36/36 [00:10<00:00,  3.35it/s]


Accuracy of the model on the test images: 71.8 %


Epoch [53/200], Loss: 0.0451: 100%|██████████| 36/36 [00:10<00:00,  3.31it/s]


Accuracy of the model on the test images: 72.2 %


Epoch [54/200], Loss: 0.0286: 100%|██████████| 36/36 [00:10<00:00,  3.39it/s]


Accuracy of the model on the test images: 74.2 %


Epoch [55/200], Loss: 0.0268: 100%|██████████| 36/36 [00:11<00:00,  3.15it/s]


Accuracy of the model on the test images: 73.4 %


Epoch [56/200], Loss: 0.0333: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 73.0 %


Epoch [57/200], Loss: 0.024: 100%|██████████| 36/36 [00:10<00:00,  3.48it/s] 


Accuracy of the model on the test images: 73.4 %


Epoch [58/200], Loss: 0.0225: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 73.6 %


Epoch [59/200], Loss: 0.0234: 100%|██████████| 36/36 [00:11<00:00,  3.23it/s]


Accuracy of the model on the test images: 73.0 %


Epoch [60/200], Loss: 0.0169: 100%|██████████| 36/36 [00:10<00:00,  3.34it/s]


Accuracy of the model on the test images: 72.6 %


Epoch [61/200], Loss: 0.0214: 100%|██████████| 36/36 [00:10<00:00,  3.43it/s]


Accuracy of the model on the test images: 74.2 %


Epoch [62/200], Loss: 0.0275: 100%|██████████| 36/36 [00:11<00:00,  3.12it/s]


Accuracy of the model on the test images: 73.0 %


Epoch [63/200], Loss: 0.0241: 100%|██████████| 36/36 [00:10<00:00,  3.46it/s]


Accuracy of the model on the test images: 75.0 %


Epoch [64/200], Loss: 0.022: 100%|██████████| 36/36 [00:10<00:00,  3.39it/s] 


Accuracy of the model on the test images: 73.2 %


Epoch [65/200], Loss: 0.0138: 100%|██████████| 36/36 [00:10<00:00,  3.27it/s]


Accuracy of the model on the test images: 75.6 %


Epoch [66/200], Loss: 0.0119: 100%|██████████| 36/36 [00:10<00:00,  3.59it/s]


Accuracy of the model on the test images: 73.6 %


Epoch [67/200], Loss: 0.0143: 100%|██████████| 36/36 [00:10<00:00,  3.42it/s]


Accuracy of the model on the test images: 73.0 %


Epoch [68/200], Loss: 0.0115: 100%|██████████| 36/36 [00:10<00:00,  3.43it/s]


Accuracy of the model on the test images: 73.6 %


Epoch [69/200], Loss: 0.0107: 100%|██████████| 36/36 [00:10<00:00,  3.37it/s]


Accuracy of the model on the test images: 75.4 %


Epoch [70/200], Loss: 0.0095: 100%|██████████| 36/36 [00:11<00:00,  3.13it/s]


Accuracy of the model on the test images: 72.6 %


Epoch [71/200], Loss: 0.0114: 100%|██████████| 36/36 [00:11<00:00,  3.26it/s]


Accuracy of the model on the test images: 74.4 %
Early Stopping at Epoch 71
최고 성능 모델로 복원 (검증 손실: 497.7741, 검증 정확도: 74.4 %)


# Evaluation (Test)

In [8]:
len(val_loader.dataset)
len(test_loader.dataset)

8000

In [9]:
correct = 0
total = len(val_loader.dataset)

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += torch.sum((predicted == labels)).item()

print(f'Accuracy : {100 * correct / total} %')


Accuracy : 75.2 %


# Make SubmitFile

In [10]:
import pandas as pd

# 제출 파일 submission.csv 생성
outputs = {
    'Id': [],
    'Prediction': []
}

for images, id in tqdm(test_loader):
    model.eval()
    with torch.no_grad():
        output = model(images.to(device))
        _, predicted = torch.max(output, 1)
        outputs['Prediction'] += predicted.tolist()
        outputs['Id'] += id

df = pd.DataFrame(outputs)

df.to_csv('submission.csv', index=False, columns=['Id', 'Prediction'])

100%|██████████| 63/63 [00:12<00:00,  5.14it/s]


In [11]:
# 제출파일 다운로드
# from google.colab import files

# file_path = "submission.csv"
# files.download(file_path)