In [1]:
import numpy as np
import pandas as pd
import os
# cuda error 표시 안될 때 
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import torch
import sklearn
import random
import torchvision
import torchvision.transforms as transforms
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
# setting seed
torch.manual_seed(1)
if device == "cuda":
    torch.cuda.manual_seed_all(1)
random.seed(1)

In [5]:
# setting param
lr = 0.001
epochs = 50
batch_size= 256

In [10]:
# data loading 
transform = transforms.Compose([transforms.Resize((32,32)),transforms.ToTensor(),transforms.Normalize((0.5),(0.5))])

train_dataset = torchvision.datasets.MNIST(root='MNIST_data/',train=True,transform=transform,download=True)
test_dataset = torchvision.datasets.MNIST(root='MNIST_data/',train=False,transform=transform,download=True)

train_dataloader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size= batch_size, shuffle= True, drop_last=False)
test_dataloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size= batch_size, shuffle= False, drop_last=False)

In [66]:
# define Model
class LeNet5(torch.nn.Module):
    def __init__(self):
        ##
        super(LeNet5,self).__init__()
        ##
        
        self.c1 = torch.nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5,stride=1,padding=0)        
        self.s2 = torch.nn.AvgPool2d(kernel_size =2, stride =2)
        self.c3 = torch.nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5,stride=1,padding=0)        
        # Pooling Layer의 인자로는 kernel size 와 stride 만 들어간다.
        self.s4 = torch.nn.AvgPool2d( kernel_size =2, stride =2)
        # FC하는 과정은 Conv2d 보다 Linear 사용
#         self.c5 = torch.nn.Conv2d(in_channels=16,out_channels=120)
        self.c5= torch.nn.Linear(in_features=16*5*5,out_features=120)
        self.f6 = torch.nn.Linear(in_features=120,out_features=84)
        self.output = torch.nn.Linear(in_features=84, out_features=10)
        
        self.tanh = torch.nn.Tanh()
        self.sigmoid = torch.nn.Sigmoid()
        
        torch.nn.init.xavier_normal_(self.output.weight)
        torch.nn.init.xavier_normal_(self.f6.weight)
        
    def forward(self,x):
        out = self.c1(x)
        out = self.tanh(out)
        out = self.s2(out)
        out = self.c3(out)
        out = self.tanh(out)
        out = self.s4(out)
        out = out.view(out.size(0),-1)
        out = self.c5(out)
        out = self.tanh(out)
        out = self.f6(out)
        out = self.tanh(out)
        out = self.output(out)
#         out = self.sigmoid(out)
        return out

In [67]:
# prepare train
model = LeNet5().to(device)
optim = torch.optim.SGD(model.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()

In [68]:
def train(model, data_loader):
    model.train()
    sum_cost = 0
    sum_correct = 0
    for data,target in data_loader:
        data = data.to(device)
        target = target.to(device)
        output = model(data)
#         print(data.shape, output.shape, target.shape)
        cost = loss(output,target)
        sum_cost += cost.item()
        predict = torch.argmax(output,dim=1)
        correct = (predict == target).sum().item()
        sum_correct += correct
        optim.zero_grad()
        cost.backward()
        optim.step()
    return sum_cost/len(data_loader.dataset),sum_correct/len(data_loader.dataset)

In [69]:
import time
cur_time = time.time()
for epoch in range(epochs):
    cost,acc = train(model,train_dataloader)
    print(epoch,cost,acc*100)
print("endtime =",time.time() - cur_time)

0 0.008751852603753408 22.093333333333334
1 0.008123514872789383 52.195
2 0.007263164158662161 60.980000000000004
3 0.006252721240123113 66.74666666666667
4 0.00531728232105573 71.27
5 0.004574147472778956 74.66166666666668
6 0.004018625596165657 77.30666666666667
7 0.003603675306836764 79.42
8 0.0032881356606880826 80.96166666666666
9 0.0030418980528910955 82.28333333333333
10 0.002843828555941582 83.16
11 0.0026831516524155933 83.89999999999999
12 0.0025507385164499283 84.52333333333333
13 0.002437978074947993 85.00333333333333
14 0.0023411030704776447 85.40333333333334
15 0.0022568170701464016 85.76
16 0.0021836641907691957 86.12666666666667
17 0.0021177221993605297 86.41
18 0.0020595117355386417 86.70166666666667
19 0.002008778993288676 86.92166666666667
20 0.001959413293500741 87.17833333333334
21 0.0019163584594925245 87.40333333333334
22 0.0018764892001946767 87.59166666666667
23 0.0018411386708418527 87.795
24 0.0018045106828212738 87.94999999999999
25 0.0017736833194891611 88.

In [None]:
def validate(model,data_loader):
    model.eval()
    sum_correct = 0
    for data in data_loader:
        output = model(data[0].to(device))
        target = data[1].to(device)
        predict = torch.argmax(output, dim=1)
        correct = (predict == target).sum().item()
        sum_correct += correct
    return sum_correct/len(data_loader.dataset)

In [None]:
with torch.no_grad():
    print(validate(model,train_data_loader))

In [None]:
# set Tensor on Dataset
test_dataset = torch.utils.data.TensorDataset(x_test)
test_data_loader = DataLoader(dataset=test_dataset,batch_size= batch_size, shuffle = False)

In [None]:
def predict(model,data_loader):
    model.eval()
    predict_array =[]
    for data in data_loader:
        output = model(data[0].to(device))
        predict = torch.argmax(output,dim = 1)
        predict_array += np.array(predict.cpu().detach()).tolist()
    return predict_array

In [None]:
with torch.no_grad():
    predict = predict(model,test_data_loader)
    submission['Category'] = predict
    print(submission)

In [None]:
submission.to_csv("submission.csv",index=False)