In [1]:
import numpy as np
import pandas as pd
import os
# cuda error 표시 안될 때 
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import torch
import sklearn
import random
import torchvision
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# setting seed
torch.manual_seed(1)
if device == "cuda":
    torch.cuda.manual_seed_all(1)
random.seed(1)

In [4]:
# load data 
train = pd.read_csv("./2021-ai-w10-p1/train.csv")
test = pd.read_csv("./2021-ai-w10-p1/test.csv")
submission = pd.read_csv("./2021-ai-w10-p1/sample_submit.csv")

print(train.info())
print(submission.head())
print(train.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, Category to 28x28
dtypes: int64(785)
memory usage: 359.3 MB
None
   Id  Category
0   0         0
1   1         0
2   2         0
3   3         0
4   4         0
   Category  1x1  1x2  1x3  1x4  1x5  1x6  1x7  1x8  1x9  ...  28x19  28x20  \
0         5    0    0    0    0    0    0    0    0    0  ...      0      0   
1         0    0    0    0    0    0    0    0    0    0  ...      0      0   
2         3    0    0    0    0    0    0    0    0    0  ...      0      0   
3         3    0    0    0    0    0    0    0    0    0  ...      0      0   
4         6    0    0    0    0    0    0    0    0    0  ...      0      0   

   28x21  28x22  28x23  28x24  28x25  28x26  28x27  28x28  
0      0      0      0      0      0      0      0      0  
1      0      0      0      0      0      0      0      0  
2      0      0      0      0      0      0      0      0  
3      0      0      0    

In [5]:
# data preprocessing

# data scaling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

y_train = np.array(train['Category'])
x_train = np.array(train.drop(['Category'],axis=1))
x_test = np.array(test)

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
# set data on Tensor
x_train = torch.Tensor(x_train).to(device)
y_train = torch.LongTensor(y_train).to(device)
x_test = torch.Tensor(x_test).to(device)

print(x_train.shape, x_test.shape)
print(y_train.shape)

torch.Size([60000, 784]) torch.Size([10000, 784])
torch.Size([60000])


In [7]:
# chage data shape 1d -> 2d
x_train = x_train.view(60000,1,28,28)
x_test = x_test.view(10000,1,28,28)
print(x_train.shape)
print(x_test.shape)

torch.Size([60000, 1, 28, 28])
torch.Size([10000, 1, 28, 28])


In [8]:
# set Tensor on Dataset
train_dataset = torch.utils.data.TensorDataset(x_train,y_train)

In [9]:
model = torchvision.models.resnet18(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

model.conv1 = torch.nn.Conv2d(1,64,kernel_size = (7,7),stride =(2,2), padding=(3, 3), bias=True)
model.fc = torch.nn.Linear(512,10,bias=True)

model = model.to(device)
print(model)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2):

In [10]:
# setting param, optim, cost function,dataloader
from torch.utils.data import DataLoader

lr = 1e-3
batch_size = 10
optim = torch.optim.Adam(model.parameters(), lr=lr)
data_loader = DataLoader(dataset=train_dataset, batch_size = batch_size, shuffle=True)
epochs = 20
loss = torch.nn.CrossEntropyLoss()

In [11]:
##### 메모리가 넘치게 됨 !! 왜???? 함수형 train 일때는 왜 정상작동 일까?





# # learning
# model.train()
# for epoch in range(epochs+1):
#     sum_cost = 0
#     for x,y in data_loader:
#         h = model(x)
#         cost = loss(h,y)
        
#         optim.zero_grad()
#         cost.backward()
#         optim.step()
#         sum_cost += cost
#     if epoch % (epochs/10) == 0 :
#         model.eval()
#         predict = model(x_train)
#         predict = torch.argmax(predict,dim=1)
#         acc = predict == y_train
#         acc = acc.float().mean().cpu().detach().item()
#         print(epoch,sum_cost.item()/len(data_loader),acc)

In [12]:
def train(model, data_loader):
    model.train()
    sum_cost = 0.0
    sum_correct = 0
    for data,target in data_loader:
        optim.zero_grad()
        output = model(data)
        cost = loss(output,target)
        sum_cost += cost.item()
        predict = torch.argmax(output,dim=1)
        correct = (predict == target).sum().item()
        sum_correct += correct
        cost.backward()
        optim.step()
    return sum_cost/len(data_loader.dataset),sum_correct/len(data_loader.dataset)

In [13]:
import time
cur_time = time.time()
for epoch in range(epochs):
    cost,acc = train(model,data_loader)
    print(epoch,cost,acc*100)
print("endtime =",time.time() - cur_time)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


0 0.10211705977159242 66.805
1 0.07623289671766882 76.06666666666668
2 0.07090574819788648 78.26166666666666
3 0.06802853074207281 79.24333333333334
4 0.06612947376727436 79.87666666666667
5 0.06486496378054532 80.25999999999999
6 0.06390032868430329 80.58166666666666
7 0.06262164665420229 81.06833333333333
8 0.06233893939880654 81.31166666666667
9 0.060255207381366443 81.77333333333333
10 0.06043448791427848 81.67333333333333
11 0.0595937585179694 82.31166666666667
12 0.05911891209995374 82.21333333333334
13 0.058747694427520036 82.405
14 0.05820702289533025 82.46333333333334
15 0.05875926433011579 82.40166666666666
16 0.05799818872497417 82.80666666666666
17 0.057781548094012154 82.77666666666667
18 0.05772658939055982 82.75833333333334
19 0.05731412079997826 82.77666666666667
endtime = 3006.2206501960754


In [14]:
with torch.no_grad():
    model.eval()
    predict = model(x_train)
    predict = torch.argmax(predict,dim=1)
    print(predict)
    acc = y_train == predict
print(acc.float().mean())

tensor([5, 0, 3,  ..., 9, 4, 5], device='cuda:0')
tensor(0.8948, device='cuda:0')


In [15]:
with torch.no_grad():
    model.eval()
    predict = model(x_test)
    predict = torch.argmax(predict,dim=1)
    submission['Category'] = predict.cpu().detach()
    print(submission)

        Id  Category
0        0         6
1        1         1
2        2         0
3        3         0
4        4         9
...    ...       ...
9995  9995         3
9996  9996         7
9997  9997         2
9998  9998         8
9999  9999         3

[10000 rows x 2 columns]


In [16]:
submission.to_csv("submission.csv",index=False)