## Import Module

In [94]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

import warnings
warnings.filterwarnings("ignore")

path = '../dataset/hand_gesture/'

train = pd.read_csv(path+'train.csv')
test = pd.read_csv(path+'test.csv')
sample_submission = pd.read_csv(path+'sample_submission.csv')

In [95]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import random
random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x2267e1fdb90>

## DEVICE

In [96]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
    
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 1.8.0+cpu  Device: cpu


In [97]:
batch_size = 16
EPOCHS = 20
lr = 0.005

## Data Loader

In [98]:
train_x = train.drop(['id', 'target'], axis=1)
test_x = test.drop(['id'], axis=1)

In [99]:
# Tensor로 변환.
train_x = torch.tensor(train_x.to_numpy()).float()
train_y = torch.tensor(train['target'].to_numpy(), dtype=torch.int64)

In [100]:
train_x.shape

torch.Size([2335, 32])

In [101]:
train_y.shape

torch.Size([2335])

In [102]:
train_dataset = TensorDataset(train_x, train_y)

In [103]:
# DataLoader 생성.
train_loader = torch.utils.data.DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset = test_x,
    batch_size = batch_size,
    shuffle = False
)

In [104]:
for idx, values in enumerate(train_loader):
    if idx > 0:
        break
        
    print(values[0].shape)
    print(values[1])

torch.Size([16, 32])
tensor([0, 2, 3, 2, 0, 0, 1, 1, 0, 1, 2, 2, 1, 0, 0, 0])


### Models

In [107]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(32, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 4)
        self.dropout_prob = 0.2
        self.batch_norm1 = nn.BatchNorm1d(64)
        self.batch_norm2 = nn.BatchNorm1d(128)
        
    def forward(self, x):
        x = x.view(-1, 32)
        
        x = self.fc1(x)
        x = self.batch_norm1(x)
        x = F.relu(x)
        x = F.dropout(x, training = self.training, p = self.dropout_prob)
        
        x = self.fc2(x)
        x = self.batch_norm2(x)
        x = F.relu(x)
        x = F.dropout(x, training = self.training, p = self.dropout_prob)
            
        x = self.fc3(x)
        x = F.log_softmax(x, dim = 1)
        return x
    
model = Net().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

print(model)

Net(
  (fc1): Linear(in_features=32, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=4, bias=True)
  (batch_norm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [108]:
for epoch in range(EPOCHS):
    
    running_loss = 0.0
    accuracy = 0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimizer.zero_grad() # 매개변수를 0으로 만듭니다. 매 학습시 초기화해줘야합니다.
        outputs = model(inputs) # 입력값을 넣어 순전파를 진행시킵니다.

        loss = criterion(outputs, labels) # 모델 출력값와 실제값을 손실함수에 대입합니다.
        loss.backward() # 손실함수에서 역전파 수행합니다.
        optimizer.step() # 옵티마이저를 사용해 매개변수를 최적화합니다.

        running_loss += loss.item()

        _, predictions = torch.max(outputs, 1)

        for label, prediction in zip(labels, predictions):
            if label == prediction:
                accuracy = accuracy + 1

    
    print(f'{epoch + 1} Epoch Loss: {running_loss / i:.3f}')
    print(f'{epoch + 1} Epoch Accuracy: {accuracy / (i * 16):.3f}')

1 Epoch Loss: 1.234
1 Epoch Accuracy: 0.443
2 Epoch Loss: 1.025
2 Epoch Accuracy: 0.569
3 Epoch Loss: 0.908
3 Epoch Accuracy: 0.631
4 Epoch Loss: 0.859
4 Epoch Accuracy: 0.655
5 Epoch Loss: 0.820
5 Epoch Accuracy: 0.670
6 Epoch Loss: 0.792
6 Epoch Accuracy: 0.687
7 Epoch Loss: 0.767
7 Epoch Accuracy: 0.700
8 Epoch Loss: 0.717
8 Epoch Accuracy: 0.716
9 Epoch Loss: 0.760
9 Epoch Accuracy: 0.715
10 Epoch Loss: 0.722
10 Epoch Accuracy: 0.731
11 Epoch Loss: 0.719
11 Epoch Accuracy: 0.724
12 Epoch Loss: 0.678
12 Epoch Accuracy: 0.745
13 Epoch Loss: 0.708
13 Epoch Accuracy: 0.728
14 Epoch Loss: 0.687
14 Epoch Accuracy: 0.729
15 Epoch Loss: 0.664
15 Epoch Accuracy: 0.752
16 Epoch Loss: 0.660
16 Epoch Accuracy: 0.744
17 Epoch Loss: 0.639
17 Epoch Accuracy: 0.752
18 Epoch Loss: 0.635
18 Epoch Accuracy: 0.761
19 Epoch Loss: 0.652
19 Epoch Accuracy: 0.755
20 Epoch Loss: 0.629
20 Epoch Accuracy: 0.765


### Pred

In [112]:
test_x = torch.from_numpy(test_x.to_numpy()).float()

In [113]:
model.eval() # 모델을 평가모드로 바꿉니다. dropout이 일어나지 않습니다.

with torch.no_grad(): # 이 안의 코드는 가중치 업데이트가 일어나지 않습니다.
    outputs = model(test_x)
    _, pred = torch.max(outputs, 1)

pred

tensor([1, 0, 1,  ..., 2, 0, 3])

In [114]:
sample_submission['target'] = pred.numpy()
sample_submission['target'].value_counts()

1    3716
2    2002
3    1835
0    1790
Name: target, dtype: int64

In [115]:
sample_submission.to_csv('torch1.csv',index=False)