In [0]:
### 로컬 데이터 불러오기 ###
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'. format(name = fn, length = len(uploaded[fn])))

Saving CreditCard.csv to CreditCard.csv
User uploaded file "CreditCard.csv" with length 77138 bytes


In [0]:
### 모듈 및 함수 로딩 ###
import io
import pandas as pd
import numpy as np
import tensorflow as tf
import random
from sklearn.preprocessing import LabelEncoder, normalize

In [0]:
### 데이터 로딩 ###
credit_card = pd.read_csv(io.StringIO(uploaded['CreditCard.csv'].decode('utf-8')))

In [0]:
### 범주형 변수 더미화 ###
credit_cat = credit_card[["card", "owner", "selfemp"]]

label_enc = LabelEncoder()

card = label_enc.fit_transform(credit_cat["card"])
card.shape = (len(card), 1)

owner = label_enc.fit_transform(credit_cat["owner"])
owner.shape = (len(owner), 1)

selfemp = label_enc.fit_transform(credit_cat["selfemp"])
selfemp.shape = (len(selfemp), 1)

In [0]:
### 수치형 변수 정규화 ###
credit_num  = credit_card.drop(["card", "owner", "selfemp", "share"], axis = 1)
credit_num_norm = normalize(credit_num)

In [0]:
### X, y 데이터 설정 ###
credit_X = np.concatenate([card, owner, selfemp, credit_num_norm], axis = 1)
credit_y = np.array(credit_card['share'])
credit_y.shape = (len(credit_y), 1)

In [0]:
### train, test 데이터로 분할 ###
train_idx = random.sample(list(range(len(credit_card))), int(len(credit_card) * 0.7))
train_X = credit_X[train_idx, :]
train_y = credit_y[train_idx]

test_X = np.delete(credit_X, train_idx, axis = 0)
test_y = np.delete(credit_y, train_idx)
test_y.shape = (len(test_y), 1)

In [0]:
X = tf.placeholder(dtype = tf.float32, shape = (None, 11))
y = tf.placeholder(dtype = tf.float32, shape = None)


In [0]:
W1 = tf.Variable(initial_value = tf.random_normal([11,4]), dtype = tf.float32)
b1 = tf.Variable(initial_value = tf.random_normal([4]), dtype = tf.float32)
L1 = tf.add(tf.matmul(X, W1), b1)

In [0]:
W2 = tf.Variable(initial_value = tf.random_normal([4, 1]), dtype = tf.float32)
b2 = tf.Variable(initial_value = tf.random_normal([1]), dtype = tf.float32)
hypo = tf.add(tf.matmul(L1, W2), b2)

In [0]:
cost = tf.reduce_mean(tf.square(hypo- y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [0]:
for i in range(1000):
  _, cost_val = sess.run([train, cost], feed_dict = {X: train_X, y : train_y})
  if i % 100 ==0:
    print("cost: ", cost_val)
print("train_finished!")

cost:  6.968681
cost:  0.16641833
cost:  0.08436834
cost:  0.063424654
cost:  0.054314632
cost:  0.048507616
cost:  0.04411387
cost:  0.04056678
cost:  0.037621822
cost:  0.03513724
train_finished!


In [0]:
pred_val, pred_cost = sess.run([hypo, cost], feed_dict = ({X: test_X, y : test_y}))
print("predict value: ", pred_val, "\n", "predict cost: ", pred_cost)


predict value:  [[-2.77253836e-01]
 [ 9.58121419e-02]
 [-1.02744102e-02]
 [ 8.14068317e-02]
 [-2.55194843e-01]
 [-5.01984358e-02]
 [ 5.98791838e-02]
 [ 1.14268482e-01]
 [ 1.35207713e-01]
 [-2.61202604e-01]
 [ 2.26454496e-01]
 [ 1.60918176e-01]
 [ 1.53310120e-01]
 [ 1.89083576e-01]
 [-1.91513896e-02]
 [ 2.02971816e-01]
 [-1.71022236e-01]
 [ 6.31069541e-02]
 [ 2.35209584e-01]
 [ 8.43721628e-02]
 [-5.12999296e-02]
 [ 6.53854609e-02]
 [-2.44584084e-02]
 [ 3.79449725e-02]
 [ 7.74394870e-02]
 [ 2.30163217e-01]
 [ 1.39648318e-01]
 [ 1.39276803e-01]
 [ 2.21821964e-01]
 [ 1.39757514e-01]
 [ 1.15436852e-01]
 [ 3.19838524e-04]
 [-1.36746138e-01]
 [-1.75015926e-02]
 [ 6.97110891e-02]
 [ 4.97368574e-02]
 [ 9.92419720e-02]
 [ 1.38631225e-01]
 [ 2.19925463e-01]
 [ 1.96480572e-01]
 [ 1.94987357e-01]
 [ 1.60680652e-01]
 [ 9.06897187e-02]
 [ 2.89257407e-01]
 [ 1.01001322e-01]
 [ 7.29106367e-01]
 [ 1.41523719e-01]
 [-5.19150496e-03]
 [-7.00618625e-02]
 [ 1.51610017e-01]
 [ 2.51676500e-01]
 [-2.68457532e-