## 기본 베이지 DNN

In [29]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense,Dropout,Activation
from torch.utils.data import TensorDataset, DataLoader
from tensorflow.python.keras import metrics
from tensorflow.python import keras
import tensorflow as tf
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

In [30]:
%matplotlib inline

In [31]:
df=pd.read_excel('./modeling_data/Result3.xlsx')

In [32]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,Label
0,#AAGQKY,45.0,72.0,0.0,0.0,0.0,148.0,0
1,#AAGQKY,45.0,72.0,0.0,0.0,0.0,148.0,0
2,#AAGQKY,45.0,72.0,0.0,0.0,0.0,148.0,0
3,#AAGQKY,45.0,72.0,0.0,0.0,0.0,148.0,0
4,#AAGQKY,45.0,72.0,0.0,0.0,0.0,148.0,0


In [33]:
df['Label'].unique()

array([0, 1, 2], dtype=int64)

In [56]:
train=df.iloc[:17780,:]
test=df.iloc[17780:,:]

In [35]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [36]:
scaler = StandardScaler()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [37]:
smote = SMOTE(random_state=0)
X_train_over,y_train_over = smote.fit_resample(X_train,y_train)
print('SMOTE 적용 전 학습용 피처/레이블 데이터 세트: ', X_train.shape, y_train.shape)
print('SMOTE 적용 후 학습용 피처/레이블 데이터 세트: ', X_train_over.shape, y_train_over.shape)
print('SMOTE 적용 전 레이블 값 분포: \n', pd.Series(y_train).value_counts())
print('SMOTE 적용 후 레이블 값 분포: \n', pd.Series(y_train_over).value_counts())

SMOTE 적용 전 학습용 피처/레이블 데이터 세트:  (18032, 6) (18032,)
SMOTE 적용 후 학습용 피처/레이블 데이터 세트:  (47745, 6) (47745,)
SMOTE 적용 전 레이블 값 분포: 
 0    15915
2     1597
1      520
Name: Label, dtype: int64
SMOTE 적용 후 레이블 값 분포: 
 0    15915
1    15915
2    15915
Name: Label, dtype: int64


In [38]:
X_train = torch.FloatTensor(X_train_over)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train_over)
y_test = torch.LongTensor(y_test.to_numpy())

In [40]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=batch_size, shuffle=False)

In [55]:
X_test

tensor([[-1.0978,  0.8388, -0.2317,  0.1022, -0.9882, -0.5850],
        [-0.3997, -0.4827, -0.2317, -0.2046, -0.9882, -0.7627],
        [-1.5965, -0.5300, -0.2317, -0.1221, -0.9882, -1.7376],
        ...,
        [ 2.2211, -0.1830, -0.2317, -0.5453,  1.3377,  0.4545],
        [-0.3558, -0.7403, -0.2317, -0.5453, -0.9882, -1.1113],
        [-0.5467, -0.2493, -0.0212,  0.1329,  1.1086, -0.9025]])

In [41]:
class NeuralNetworkClassificationModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(NeuralNetworkClassificationModel,self).__init__()
        self.input_layer    = nn.Linear(input_dim,256)
        self.hidden_layer1  = nn.Linear(256,128)
        self.output_layer   = nn.Linear(128,output_dim)
        self.relu = nn.ReLU()
    
    
    def forward(self,x):
        out =  self.relu(self.input_layer(x))
        out =  self.relu(self.hidden_layer1(out))
        out =  self.output_layer(out)
        return out

In [42]:
# input_dim = 4 because we have 4 inputs namely sepal_length,sepal_width,petal_length,petal_width
# output_dim = 3 because we have namely 3 categories setosa,versicolor and virginica
input_dim  = 6
output_dim = 3
model = NeuralNetworkClassificationModel(input_dim,output_dim)

In [43]:
# creating our optimizer and loss function object
learning_rate = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

In [44]:
def train_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses):
    for epoch in range(num_epochs):
        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()
        
        #forward feed
        output_train = model(X_train)
      
        #calculate the loss
        loss_train = criterion(output_train, y_train)

        #backward propagation: calculate gradients
        loss_train.backward()

        #update the weights
        optimizer.step()

        train_losses[epoch] = loss_train.item()
       

        if (epoch + 1) % 50 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss_train.item():.4f}")

In [45]:
num_epochs = 500
train_losses = np.zeros(num_epochs)
test_losses  = np.zeros(num_epochs)

In [46]:
train_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses)

Epoch 50/500, Train Loss: 0.6113
Epoch 100/500, Train Loss: 0.4637
Epoch 150/500, Train Loss: 0.4005
Epoch 200/500, Train Loss: 0.3637
Epoch 250/500, Train Loss: 0.3605
Epoch 300/500, Train Loss: 0.2892
Epoch 350/500, Train Loss: 0.2768
Epoch 400/500, Train Loss: 0.2611
Epoch 450/500, Train Loss: 0.2658
Epoch 500/500, Train Loss: 0.2417


In [47]:
def test_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses):
 
    correct=0
    #clear out the gradients from the last step loss.backward()
    
    optimizer.zero_grad()

    #forward feed
    output_test = model(X_test)

    pred = output_test.argmax(dim=1, keepdim=True)
    correct = pred.eq(y_test.view_as(pred)).sum().item()
    print('output_test:{}'.format(len(output_test)),'pred:{}'.format(len(pred)))
    print('\nAccuracy: {}/{} ({:.0f}%)\n'.format(correct, len(X_test), 100. * correct / len(X_test)))

In [50]:
def test():
    model.eval()
    loss = 0
    correct = 0
    for data, target in test_loader:
        print(target)
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss += criterion(output, target).data.item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))

In [52]:
test()

ValueError: too many values to unpack (expected 2)

In [48]:
test_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses)

output_test:7728 pred:7728

Accuracy: 6590/7728 (85%)



In [None]:
plt.figure(figsize=(10,10))
plt.plot(train_losses, label='train loss')
plt.legend()
plt.show()

In [None]:
  444

## k_fold 교차 검증