In [114]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         continue

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [115]:
#random seed 고정
import random
import torch
import numpy as np

import torch.nn as nn
from PIL import Image
from tqdm import tqdm
device = 'cuda' if torch.cuda.is_available() else 'cpu'

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available(): 
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [116]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, split, rootpath):
        super(Dataset, self).__init__()
        self.split = split
        self.rootpath = rootpath
        self.img_path = []
        self.img_label = []
        
        if self.split == 'train':
            train_path = os.path.join(self.rootpath, 'train')
            classes = os.listdir(train_path)
            for cls in classes:
                img_id = os.listdir(os.path.join(train_path, cls))
                for img in img_id:
                    self.img_path.append(os.path.join(train_path, cls, img))
                    self.img_label.append(int(cls))
        elif self.split == 'test':
            test_path = os.path.join(self.rootpath, 'test', 'img')
            img_id = os.listdir(test_path)
            for img in img_id:
                self.img_path.append(os.path.join(test_path, img))
            self.img_path = sorted(self.img_path)
            
    def __len__(self):
        return len(self.img_path)
    
    def __getitem__(self, idx):
        img = Image.open(self.img_path[idx])
        img = torch.tensor(np.array(img).astype('float32'))
        img = img.unsqueeze(0)
        
        if self.split == 'train':
            return img, self.img_label[idx]
        if self.split == 'test':
            img_id = (self.img_path[idx].split('/')[-1]).split('.')[-2]
            return img, img_id

In [117]:
rootpath = "/kaggle/input/sejong-ai-challenge-practice"

train = Dataset(split = 'train', rootpath = rootpath)
test = Dataset(split = 'test', rootpath = rootpath)

In [119]:
train_set, val_set = torch.utils.data.random_split(train, [0.9, 0.1])

In [120]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_set, batch_size=128, shuffle=True)

In [122]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.input_layer = nn.Sequential(
            nn.Conv2d(1, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        self.hidden_layer1 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        self.hidden_layer2 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2))
        
        self.fc1 = nn.Sequential(
            nn.Linear(256*3*3, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU())
        
        self.dropout = nn.Dropout(p=0.5)
        
        self.fc2 = nn.Linear(1024, 10)
        
    def forward(self, input):
        x = self.input_layer(input)
        x = self.hidden_layer1(x)
        x = self.hidden_layer2(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

In [123]:
model = CNN().cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

In [124]:
epochs = 4
for epoch in range(epochs):
    model.train()
    train_acc=0
    for x, y in tqdm(train_dataloader):
        x = x.cuda()
        y = y.cuda()
        
        pred = model(x)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_acc += (torch.argmax(torch.softmax(pred, dim=1), dim=1)==y).sum()
        
    with torch.no_grad():
        model.eval()
        val_acc=0
        for x, y in val_dataloader:
            x = x.cuda()
            y = y.cuda()

            pred = model(x)
            loss = loss_fn(pred, y)

            val_acc += (torch.argmax(torch.softmax(pred, dim=1), dim=1)==y).sum()

    print('train_acc :', train_acc/len(train_dataloader.dataset), ", val_acc :", val_acc/len(val_dataloader.dataset))

100%|██████████| 422/422 [01:29<00:00,  4.70it/s]


train_acc : tensor(0.9755, device='cuda:0') , val_acc : tensor(0.9883, device='cuda:0')


100%|██████████| 422/422 [00:44<00:00,  9.44it/s]


train_acc : tensor(0.9890, device='cuda:0') , val_acc : tensor(0.9883, device='cuda:0')


100%|██████████| 422/422 [00:44<00:00,  9.40it/s]


train_acc : tensor(0.9916, device='cuda:0') , val_acc : tensor(0.9898, device='cuda:0')


100%|██████████| 422/422 [00:45<00:00,  9.34it/s]


train_acc : tensor(0.9939, device='cuda:0') , val_acc : tensor(0.9892, device='cuda:0')


In [125]:
full_train_dataloader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)

In [126]:
model = CNN().cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss()

In [127]:
epochs = 4
for epoch in range(epochs):
    model.train()
    train_acc=0
    for x, y in tqdm(full_train_dataloader):
        x = x.cuda()
        y = y.cuda()
        
        pred = model(x)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_acc += (torch.argmax(torch.softmax(pred, dim=1), dim=1)==y).sum()
    print('train_acc :', train_acc/len(full_train_dataloader.dataset))

100%|██████████| 469/469 [00:50<00:00,  9.27it/s]


train_acc : tensor(0.9766, device='cuda:0')


100%|██████████| 469/469 [00:48<00:00,  9.65it/s]


train_acc : tensor(0.9896, device='cuda:0')


100%|██████████| 469/469 [00:48<00:00,  9.65it/s]


train_acc : tensor(0.9922, device='cuda:0')


100%|██████████| 469/469 [00:51<00:00,  9.10it/s]

train_acc : tensor(0.9937, device='cuda:0')





In [128]:
test_dataloader = torch.utils.data.DataLoader(test, batch_size=1, shuffle=False)

In [129]:
submit = pd.read_csv("/kaggle/input/sejong-ai-challenge-practice/sample_submit.csv")

In [130]:
model.eval()
final_result=[]
with torch.no_grad():
    for x, img_id in tqdm(test_dataloader):
        x = x.cuda()
        pred = model(x)
        
        final_result.append((torch.argmax(torch.softmax(pred, dim=1), dim=1)).item())

100%|██████████| 10000/10000 [00:26<00:00, 373.11it/s]


In [131]:
submit['label'] = final_result
submit

Unnamed: 0,index,label
0,0,7
1,1,2
2,10,0
3,100,6
4,1000,9
...,...,...
9995,9995,2
9996,9996,3
9997,9997,4
9998,9998,5


In [132]:
submit.to_csv("submission.csv", index=False)