In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler       # Normalization
from sklearn.model_selection import train_test_split # train, test 분리
from sklearn.model_selection import KFold            # Cross Validation

df = pd.read_csv('./bmi.csv')

In [None]:
# Data Split ( Train Data와 Test Data로 나눌꺼예요! )
x_data_train, x_data_test, t_data_train, t_data_test = train_test_split(df[['height', 'weight']],df['label'], test_size=0.3, random_state=0)  # random_state는 seed의 개념과 같아요!

# Min-Max Scaler를 이용해서 정규화(Normalization)진행
scaler = MinMaxScaler()
scaler.fit(x_data_train)
x_data_train_norm = scaler.transform(x_data_train)
x_data_test_norm = scaler.transform(x_data_test)

# Tensorflow로 Onehot 처리
sess = tf.Session()
t_data_train_onehot = sess.run(tf.one_hot(t_data_train, depth=3))
t_data_test_onehot = sess.run(tf.one_hot(t_data_test, depth=3))

In [None]:
# placeholder
X = tf.placeholder(shape=[None,2], dtype=tf.float32)
T = tf.placeholder(shape=[None,3], dtype=tf.float32)

# Weight & bias
W = tf.Variable(tf.random.normal([2,3]), name='weight')
b = tf.Variable(tf.random.normal([3]), name='bias')

# Hypothesis
logit = tf.matmul(X,W) + b
H = tf.nn.softmax(logit)  # softmax activation function

# loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit,
                                                                 labels=T))

# train
train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss)


# parameter
num_of_epoch = 1000
batch_size = 100

# 학습용 함수
def run_train(sess,train_x, train_t):
    print('### 학습 시작 ###')
    # 초기화
    sess.run(tf.global_variables_initializer())
    
    for step in range(num_of_epoch):
        total_batch = int(train_x.shape[0] / batch_size)
        
        for i in range(total_batch):
            batch_x = train_x[i*batch_size:(i+1)*batch_size]
            batch_t = train_t[i*batch_size:(i+1)*batch_size]           
            _, loss_val = sess.run([train,loss],
                                   feed_dict={X: batch_x,
                                              T: batch_t})
            
        if step % 100 == 0:
            print('Loss : {}'.format(loss_val))
    print('### 학습 종료 ###')
    
# Accuracy 측정(정확도)    
predict = tf.argmax(H,1)
correct = tf.equal(predict, tf.argmax(T,1))
accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

In [None]:
# K-Fold Cross Validation
cv = 5          # Fold의 수
results = []    # 각 Fold당 학습과 성능평가가 진행되는데 
                # 이때 계산된 성능평가 값을 저장
kf = KFold(n_splits=cv, shuffle=True) 

for training_idx, validation_idx in kf.split(x_data_train_norm):
    training_x = x_data_train_norm[training_idx] # Fancy indexing
    training_t = t_data_train_onehot[training_idx]
    
    val_x = x_data_train_norm[validation_idx]
    val_t = t_data_train_onehot[validation_idx]
    
    # 학습부터 시켜야 해요!
    run_train(sess,training_x,training_t)
    results.append(sess.run(accuracy, feed_dict={X:val_x, T:val_t}))

print('측정한 각각의 결과값 : {}'.format(results))
print('최종 K-Fold 교차검증을 사용한 Accuracy : {}'.format(np.mean(results)))

In [None]:
# Testing
final_accuracy = sess.run(accuracy, feed_dict={X:x_data_test_norm,
                                               T:t_data_test_onehot})
print('우리 Model의 최종 정확도는 : {}'.format(final_accuracy))

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

target_names=['0', '1', '2']

print(
classification_report(t_data_test,
                     sess.run(predict, feed_dict={X:x_data_test_norm}),
                     target_names = target_names))

In [None]:
print(
confusion_matrix(t_data_test,
                     sess.run(predict, feed_dict={X:x_data_test_norm})))