# Google drive mount

In [None]:
# 계정 드라이브와 마운트하기
# 모두 선택해서 동의해야 마운트가 오류없이 됩니다.
from google.colab import drive
drive.mount('/content/drive')

# 파일 가져오기

In [None]:
!ls /content/drive/MyDrive/Kaggle02

In [None]:
!unzip /content/drive/MyDrive/***/rokey-boot-camp-2nd-competition.zip -d data

# 파일 구조 확인

In [None]:
!ls data/Kaggle02

In [None]:
!ls data/Kaggle02/train

In [None]:
!ls data/Kaggle02/test/

In [None]:
import json

data = json.load(open("data/Kaggle02/class_name.json"))
data

In [2]:
# 랜덤성 제어를 위한 seed 고정
import random
import numpy as np

seed = 0
deterministic = True

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
if deterministic:
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

# DataLoader

압축푼 직후에는 파일적용이 되지 않아 FileNotFoundError 오류가 뜰 수 있습니다.

그러한 경우 약간의 대기 시간 이후 다시 실행하면 됩니다.

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torchvision.transforms as transforms

from PIL import Image
from tqdm import tqdm
import pandas as pd
import json


In [3]:
from glob import glob

class MyDataset(Dataset):
    def __init__(self, data_path, transform=None, train=True):
        self.train = train

        self.name2label = json.load(open(f"{data_path}/class_name.json"))


        if self.train:
            self.img_path = glob(f"{data_path}/train/*/*.JPEG")
            self.labels =  [self.name2label[d.split("/")[-2]] for d in self.img_path]
        else:
            self.img_path = glob(f"{data_path}/test/*.JPEG")

        self.transform = transform

    def __len__(self):
        return len(self.img_path)

    def __getitem__(self, index):
        img = Image.open(self.img_path[index])
        if img.mode != 'RGB':
            img = img.convert('RGB')


        if self.transform:
            img = self.transform(img)

        if self.train:
            return img, self.labels[index]
        else:
            return img, self.img_path[index].split("/")[-1]


data_path = '/home/kim/Desktop/AI_project02/Kaggle02'
transform =  transforms.Compose([
    transforms.ToTensor(),
])

train_data = MyDataset(data_path, train=True, transform=transform)
test_data = MyDataset(data_path, train=False, transform=transform)

# Split train data into train and validation
train_size = int(len(train_data) * 0.9)
train_data, val_data = torch.utils.data.random_split(train_data, [train_size, len(train_data) - train_size])


train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)

In [None]:
from matplotlib import pyplot as plt

name2label = json.load(open(f"{data_path}/class_name.json"))
label2name = {v:k for k, v in name2label.items()}

for idx, (images, labels) in enumerate(train_data):

    if idx > 20:
      break

    print(label2name[labels], idx)
    plt.imshow(images.permute(1, 2, 0))
    plt.show()


# Model

In [None]:
from torchvision.models import resnet18
import torch
import torch.nn as nn

# Torchvision 라이브러리에서 모델 불러오기
model = resnet18(pretrained=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.fc = nn.Linear(512,10, bias=True)

model = model.to(device)

print(model)

In [None]:
criterion = nn.CrossEntropyLoss() # 바꿔보기
optimizer = torch.optim.SGD(model.parameters()) # 바꿔보기

num_epochs = 5 # 바꿔보기
total_step = len(train_loader)

for epoch in range(num_epochs):
    total_loss = 0

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    model.train()
    for i, (images, labels) in pbar:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        pbar.set_description(f'Epoch [{epoch+1}/{num_epochs}], Loss: {round(total_loss / (i+1),4)}')


    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            val_loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Accuracy of the model on the test images: {100 * correct / total} %')

# Evaluation (Test)

In [None]:
len(val_loader.dataset)

In [None]:
correct = 0
total = len(val_loader.dataset)

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += torch.sum((predicted == labels)).item()

print(f'Accuracy : {100 * correct / total} %')

# Make SubmitFile

In [None]:
import pandas as pd

# 제출 파일 submission.csv 생성
outputs = {
    'Id': [],
    'Prediction': []
}

for images, id in tqdm(test_loader):
    model.eval()
    with torch.no_grad():
        output = model(images.to(device))
        _, predicted = torch.max(output, 1)
        outputs['Prediction'] += predicted.tolist()
        outputs['Id'] += id

df = pd.DataFrame(outputs)

df.to_csv('submission.csv', index=False, columns=['Id', 'Prediction'])

In [None]:
# 제출파일 다운로드
from google.colab import files

file_path = "submission.csv"
files.download(file_path)