# Neural Net

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale

# 형에 따른 구분

In [2]:
train = pd.read_csv('train.csv')
indep = train.columns[:-1]

In [3]:
discrete = []
continuous = []
for v in indep:
    if train[v].dtype == 'object':
        discrete.append(v)
    else:
        continuous.append(v)

In [4]:
train[continuous] = scale(train[continuous])

In [5]:
dummy = pd.get_dummies(train[discrete])
X = pd.concat([train[continuous], dummy], axis=1)

In [6]:
dep = train.columns[-1]
y = train[dep]

## Neural Network를 위한 replace value 0,1

In [7]:
y = y.replace(['under50k','over50k'],[0,1])
X = X.as_matrix()
y = y.as_matrix()
y = np.asmatrix(y).T

## Split Test, Train Data

In [8]:
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

## Tensorflow 불러오기

In [10]:
import tensorflow as tf
import tensorlayer as tl

### 기본 그래프 만들기

In [11]:
tf.reset_default_graph()
tl.layers.set_name_reuse(enable=True)

### 신경망 만들기

In [13]:
x = tf.placeholder(tf.float32, [None, 46]) # 입력 데이터
network = tl.layers.InputLayer(x,name="input") # 입력층

  tensorlayer:Instantiate InputLayer  input: (?, 46)


In [14]:
network = tl.layers.DenseLayer(network,n_units=15,act=tf.sigmoid, name="hidden")

  tensorlayer:Instantiate DenseLayer  hidden: 15, sigmoid


In [15]:
network = tl.layers.DenseLayer(network,n_units=10,act=tf.sigmoid, name="hidden2")

  tensorlayer:Instantiate DenseLayer  hidden2: 10, sigmoid


In [16]:
network = tl.layers.DenseLayer(network, n_units=1, act=tf.sigmoid, name="output") # 출력층

  tensorlayer:Instantiate DenseLayer  output: 1, sigmoid


### 데이터

In [17]:
predict = network.outputs # 예측값
y = tf.placeholder(tf.float32, [None, 1]) # 실제 값
cost = tl.cost.binary_cross_entropy(network.outputs, y)

In [18]:
gd = tf.train.RMSPropOptimizer(learning_rate=0.1, momentum=0.1)
train_step = gd.minimize(cost)  # cost를 최소화한다

### 데이터 설정

In [19]:
data = {x:X_train,y:y_train}

### 세션과 변수 초기화

In [20]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [21]:
predict.eval(data)  # 초기 예측값

array([[ 0.50684905],
       [ 0.5069387 ],
       [ 0.5069077 ],
       ..., 
       [ 0.50713634],
       [ 0.50750202],
       [ 0.50702292]], dtype=float32)

In [22]:
cost.eval(data)

0.70015925

In [23]:
train_step.run(data)

In [24]:
cost.eval(data)

0.67639577

In [25]:
tl.utils.fit(sess, network, train_step, cost, X_train, y_train, x, y, 
            batch_size=50,  # 한 번에 30개의 데이터 학습
            n_epoch=1000,
            print_freq=100
            )

Start training the network ...
Epoch 1 of 1000 took 0.809009s, loss 0.395155
Epoch 100 of 1000 took 0.796495s, loss 0.338894
Epoch 200 of 1000 took 0.778973s, loss 0.342056
Epoch 300 of 1000 took 0.790487s, loss 0.340840
Epoch 400 of 1000 took 0.836545s, loss 0.343837
Epoch 500 of 1000 took 0.680350s, loss 0.342035
Epoch 600 of 1000 took 0.820525s, loss 0.345700
Epoch 700 of 1000 took 0.761451s, loss 0.341891
Epoch 800 of 1000 took 0.665831s, loss 0.343030
Epoch 900 of 1000 took 0.727909s, loss 0.339379
Epoch 1000 of 1000 took 0.749967s, loss 0.337663
Total training time: 756.604899s


In [26]:
y_predict = predict.eval({x: X_test})

In [27]:
y_class = y_predict > 0.5  # 0.5와 비교

In [28]:
y_class = y_class.astype(int)

## 평가

In [29]:
from sklearn import metrics

In [30]:
def getResult(y_test,y_pred):
    print(metrics.confusion_matrix(y_test, y_pred))
    print('accurracy:', metrics.accuracy_score(y_test, y_pred))
    print('precision:', metrics.precision_score(y_test, y_pred, pos_label=1))
    print('recall:', metrics.recall_score(y_test, y_pred, pos_label=1))
    print('f1:', metrics.f1_score(y_test, y_pred, pos_label=1))
    

In [31]:
getResult(y_test,y_class)

[[1661  210]
 [ 204  425]]
accurracy: 0.8344
precision: 0.669291338583
recall: 0.675675675676
f1: 0.67246835443
