In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
path = './train/train_data.csv'

df = pd.read_csv(path)
df

Unnamed: 0,filen_name,label
0,train0001.png,8
1,train0002.png,8
2,train0003.png,8
3,train0004.png,8
4,train0005.png,8
...,...,...
4995,train4996.png,6
4996,train4997.png,6
4997,train4998.png,6
4998,train4999.png,6


In [3]:
train_file_name = df['filen_name']
train_label = df['label']

# image 파일을 불러온뒤 변수에 저장
train_image = []
for file in train_file_name:
    train_image.append(Image.open('./train/' + file))
image_to_number = np.array([np.array(image).flatten() for image in train_image])


In [4]:
image_to_number.shape

(5000, 784)

In [5]:
all_images = pd.DataFrame(image_to_number)
all_images['labels'] = df['label']

In [6]:
def compute_acc(true, pred):
    return sum(true == pred) / len(true)

In [7]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import transforms
import torch.nn.functional as F

class MNIST(Dataset):
    def __init__(self,file_path_list,labels = None):
            self.file_path_list = file_path_list
            self.labels = labels 
            self.PIL2tensor = transforms.PILToTensor()
    def __getitem__(self,idx):
        image = Image.open(self.file_path_list[idx])
        tensor_image = self.PIL2tensor(image)
        flattend_image = tensor_image.float()
        
        if self.labels is not None:
            label = self.labels[idx]
            return flattend_image, label
        
        return flattend_image
    
    def __len__(self):
        return len(self.file_path_list)

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'{device} is available')

cuda:0 is available


In [9]:
file_path_list = './train/' + df['filen_name']
labels = df['label']

mnist_dataset = MNIST(file_path_list,labels)
mnist_loader = DataLoader(mnist_dataset,batch_size =32, shuffle = True)

In [10]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,32,3,padding = 1) # in_channels 1 , out_channels 3 kernel_size 3*3
        self.pool = nn.MaxPool2d(2,2) # 2x2의 사이즈로 특성 최대값 추출
        self.conv2 = nn.Conv2d(32,64,3)
        self.fc1 = nn.Linear(2304, 100)
        self.fc2 = nn.Linear(100,10)
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
net = Net()

In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum = 0.9)
net.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=2304, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)

In [12]:
from tqdm import tqdm

for Epoch in tqdm(range(30)):
    for batch, labels in mnist_loader:
        batch = batch.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        
        output = net(batch)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        acc = compute_acc(labels.detach().cpu().numpy(), output.detach().cpu().numpy().argmax(-1))
        
    if Epoch % 10 == 0 or Epoch == 29:
        print(f'Epoch {Epoch}, loss : {loss}, acc : {acc}')

  3%|██▊                                                                                | 1/30 [00:04<02:15,  4.68s/it]

Epoch 0, loss : 0.6702593564987183, acc : 0.875


 37%|██████████████████████████████                                                    | 11/30 [00:28<00:50,  2.64s/it]

Epoch 10, loss : 0.00018763664411380887, acc : 1.0


 70%|█████████████████████████████████████████████████████████▍                        | 21/30 [00:53<00:22,  2.51s/it]

Epoch 20, loss : 3.7252871720738767e-07, acc : 1.0


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:14<00:00,  2.49s/it]

Epoch 29, loss : 6.690476766380016e-06, acc : 1.0





In [14]:
test_df = pd.read_csv('./test/test_data.csv') 
test_file_dir = './test/'

In [16]:
test_mnist_dataset = MNIST(test_file_dir + test_df['file_name'])
test_mnist_loader = DataLoader(test_mnist_dataset, batch_size = 32)
preds = None

for test_batch in tqdm(test_mnist_loader):
    test_batch = test_batch.to(device)
    output = net(test_batch)
    
    digit_pred = output.detach().cpu().numpy().argmax(-1)
    if preds is None:
        preds = digit_pred
    else:
        preds = np.concatenate([preds,digit_pred])
        

100%|████████████████████████████████████████████████████████████████████████████████| 157/157 [00:02<00:00, 64.90it/s]


In [17]:
submission = pd.read_csv('./sample_submission.csv') # sample submission 불러오기

submission['label'] = preds

submission.to_csv('submission.csv', index=False)