In [1]:
import tensorflow as tf
import numpy as np
import random

import import_ipynb
import preprocessor

tf.set_random_seed(777)
df = preprocessor.preprocessed

importing Jupyter notebook from preprocessor.ipynb


In [2]:
def divide_sets(predicting_column_indices, test_size):
    random_array = random.sample(range(df.shape[0]), test_size)
    x_train = df.drop(random_array).drop(df.columns[predicting_column_indices], axis=1)
    y_train = df.drop(random_array).iloc[:, predicting_column_indices]
    x_test = df.iloc[random_array, :].drop(df.columns[predicting_column_indices], axis=1)
    y_test = df.iloc[random_array, predicting_column_indices]
    return x_train.values, y_train.values, x_test.values, y_test.values

In [3]:
def get_batch(step, batch_size, x_train, y_train):
    training_data_size = x_train.shape[0]
    start_idx = step * batch_size
    end_idx = (step + 1) * batch_size
    if end_idx > training_data_size:
        end_idx = training_data_size
    return (
        x_train[start_idx:end_idx, :],
        y_train[start_idx:end_idx, :]
    )

In [7]:
def build_layer(L_in, in_size, out_size, keep_prob):
    W = tf.get_variable(
        str(id(L_in)),
        shape=[in_size, out_size],
        initializer=tf.contrib.layers.xavier_initializer()
    )
    #W = tf.Variable(tf.random_normal([in_size, out_size]))
    b = tf.Variable(tf.random_normal([out_size]))
    L = tf.nn.relu(tf.matmul(L_in, W) + b)
    return tf.nn.dropout(L, keep_prob=keep_prob)

def build_network(layer_sizes, learning_rate):
    X = tf.placeholder(tf.float32, shape=[None, layer_sizes[0]])
    Y = tf.placeholder(tf.float32, shape=[None, layer_sizes[-1]])
    keep_prob = tf.placeholder(tf.float32)

    L1 = build_layer(X, layer_sizes[0], layer_sizes[1], keep_prob)
    L2 = build_layer(L1, layer_sizes[1], layer_sizes[2], keep_prob)
    L3 = build_layer(L2, layer_sizes[2], layer_sizes[3], keep_prob)
    L4 = build_layer(L3, layer_sizes[3], layer_sizes[4], keep_prob)
    L5 = build_layer(L4, layer_sizes[4], layer_sizes[5], keep_prob)
    L6 = build_layer(L5, layer_sizes[5], layer_sizes[6], keep_prob)
    hypothesis = build_layer(L6, layer_sizes[6], layer_sizes[7], keep_prob)

    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=hypothesis, labels=Y
        ))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train = optimizer.minimize(cost)
    
    return train, cost, hypothesis, X, Y, keep_prob

In [15]:
def run(
    training_data_size, batch_size, training_epochs,
    cost, train, X, Y, keep_probability):
    total_batch = int(training_data_size / batch_size)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for epoch in range(training_epochs):
        avg_cost = 0
        for step in range(total_batch):
            batch_x, batch_y = get_batch(step, batch_size, x_train, y_train)
            cost_val, _ = sess.run(
                [cost, train],
                feed_dict={X: batch_x, Y: batch_y, keep_prob: keep_probability}
            )
            avg_cost += cost_val / total_batch
        print('Epoch:', '%04d' % (epoch + 1), 'cost = ', '{:.9f}'.format(avg_cost * 1e5))
    return sess

In [17]:
def calc_accuracy(hypothesis, X, Y, x_test, y_test, keep_prob):
    correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy = tf.reduce_mean(tf.exp(-tf.square((hypothesis - Y) / 3)))
    return sess.run(accuracy, feed_dict={X: x_test, Y: y_test, keep_prob: 1})

In [18]:
test_size = 200
predicting_column_indices = [0]
learning_rate = 1e-7 
training_epochs = 10
batch_size = 100
keep_probability = 1

x_train, y_train, x_test, y_test = divide_sets(
    predicting_column_indices, test_size
)
training_data_size = x_train.shape[0]
layer_sizes = [x_train.shape[1], 300, 250, 200, 150, 100, 50, y_train.shape[1]]
train, cost, hypothesis, X, Y, keep_prob = build_network(layer_sizes, learning_rate)
sess = run(
    training_data_size, batch_size, training_epochs,
    cost, train, X, Y, keep_probability
)
print("Train_accuracy: ", calc_accuracy(hypothesis, X, Y, x_train, y_train, keep_prob))
print("Test accuracy: ", calc_accuracy(hypothesis, X, Y, x_test, y_test, keep_prob))

Epoch: 0001 cost =  0.000000000
Epoch: 0002 cost =  0.000000000
Epoch: 0003 cost =  0.000000000
Epoch: 0004 cost =  0.000000000
Epoch: 0005 cost =  0.000000000
Epoch: 0006 cost =  0.000000000
Epoch: 0007 cost =  0.000000000
Epoch: 0008 cost =  0.000000000
Epoch: 0009 cost =  0.000000000
Epoch: 0010 cost =  0.000000000
Train_accuracy:  0.8846947
Test accuracy:  0.8776823
