### MLP Model on Knowledge Graphs
Using Tensorflow's MLP, we build a model on the EncodedData.csv data in data/

Format for MLP taken from https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/multilayer_perceptron.py

In [53]:
import tensorflow as tf
import pandas as pd
import numpy as np
import sklearn

Read in the data and seperate into 70% training and 30% testing data

In [89]:
data = pd.read_csv('../data/EncodedData.csv')

sz_y = 1
[m,n] = data.shape

# Seperate into Training/Testing sets
train = data.sample(frac=0.7)
lbltrn = train.iloc[:,n-sz_y:]
test = data.loc[~data.index.isin(train.index)]
lbltst = test.iloc[:,n-sz_y:]

train.shape,lbltrn.shape,test.shape,lbltst.shape

((3253, 603), (3253, 1), (1394, 603), (1394, 1))

In [100]:
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_input = 600 # MNIST data input (img shape: 28*28)
n_classes = 1 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [101]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [102]:
# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [103]:
# Construct model
logits = multilayer_perceptron(X)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Initializing the variables
init = tf.global_variables_initializer()


In [104]:
for each_iter in range(3):
    # Seperate into Training/Testing sets
    train = data.sample(frac=0.3)
    lbltrn = train.iloc[:,n-sz_y:]
    test = data.loc[~data.index.isin(train.index)]
    lbltst = test.iloc[:,n-sz_y:]
    
    with tf.Session() as sess:
        sess.run(init)

        # Training cycle
        for epoch in range(training_epochs):
            avg_cost = 0.
            total_batch = int(train.shape[0]/batch_size)
            # Loop over all batches
            for i in range(total_batch):
                batch_x = train.iloc[(i-1)*batch_size:i*batch_size,2:n-sz_y]
                batch_y = lbltrn.iloc[(i-1)*batch_size:i*batch_size,:]
                # Run optimization op (backprop) and cost op (to get loss value)
                _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                                Y: batch_y})
                # Compute average loss
                avg_cost += c / total_batch
            # Display logs per epoch step
            if epoch % display_step == 0:
                nothing = 1
                #print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
        #print("Optimization Finished!")

        # Test model
        pred = tf.nn.softmax(logits)  # Apply softmax to logits
        correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Accuracy:", accuracy.eval({X: test.iloc[:,2:n-sz_y], Y: lbltst}))

Accuracy: 1.0
Accuracy: 1.0
Accuracy: 1.0


In [151]:
sess = tf.Session()
sess.run(init)
for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(train.shape[0]/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x = train.iloc[(i-1)*batch_size:i*batch_size,2:n-sz_y]
            batch_y = lbltrn.iloc[(i-1)*batch_size:i*batch_size,:]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            nothing = 1
            #print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
    #print("Optimization Finished!")

# Test model
pred = tf.nn.softmax(logits)  # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({X: test.iloc[:,2:n-sz_y], Y: lbltst.iloc[:,:]},session=sess))

Accuracy: 1.0


In [130]:
_, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,Y: batch_y})

In [136]:
prediction=tf.argmax(logits,1)
best = sess.run([prediction],{X: test.iloc[:,2:n-sz_y], Y: lbltst})
print(best)

[array([0, 0, 0, ..., 0, 0, 0])]


In [152]:
lbltst.iloc[:,:]

Unnamed: 0,Y
0,1
1,3
3,5
6,4
9,5
10,5
11,2
12,5
14,4
15,4
