In [8]:
import tensorflow as tf
import numpy as np
import math

percent_test_samples = 10

# load file
dataset2014 = np.loadtxt(open("../../datasets/usage2014.csv", "rb"), delimiter=",", skiprows=1,  usecols=range(1,24))
dataset2015 = np.loadtxt(open("../../datasets/usage2015.csv", "rb"), delimiter=",", skiprows=1,  usecols=range(1,24))
dataset2016 = np.loadtxt(open("../../datasets/usage2016.csv", "rb"), delimiter=",", skiprows=1,  usecols=range(1,24))

# by features we mean how many columns will act as features and how many as labels
def get_training_features_indexes(dataset):
    
    rows_count, columns_count = dataset.shape

    # features index, labels index
    return columns_count - 1, [columns_count - 1]

# by samples we mean how many rows will act as training samples and how many as test
def get_training_samples_indexes(dataset):
    rows_count, columns_count = dataset.shape
    # print(rows_count)
    test_samples_count = (rows_count * percent_test_samples / 100)

    # selecting the top n rows as training samples
    return math.ceil(rows_count - test_samples_count)
    
def get_features(dataset):

    first_n_training_samples_count = get_training_samples_indexes(dataset)
    features_columns, label_columns = get_training_features_indexes(dataset)

    training_features = dataset[:first_n_training_samples_count,:features_columns]
    training_labels = dataset[:first_n_training_samples_count,label_columns]

    test_features = dataset[first_n_training_samples_count:,:features_columns ]
    test_labels = dataset[first_n_training_samples_count:,label_columns ]
    
    return training_features, training_labels, test_features, test_labels

training_features_2014 , training_labels_2014, test_features_2014, test_labels_2014 = get_features(dataset2014)
training_features_2015 , training_labels_2015, test_features_2015, test_labels_2015 = get_features(dataset2015)
training_features_2016 , training_labels_2016, test_features_2016, test_labels_2016 = get_features(dataset2016)

training_features = np.concatenate((np.concatenate((training_features_2014,training_features_2015),axis=0),training_features_2016),axis=0)
training_labels = np.concatenate((np.concatenate((training_labels_2014,training_labels_2015),axis=0),training_labels_2016),axis=0)
test_features = np.concatenate((np.concatenate((test_features_2014,test_features_2015),axis=0),test_features_2016),axis=0)
test_labels = np.concatenate((np.concatenate((test_labels_2014,test_labels_2015),axis=0),test_labels_2016),axis=0)

print(training_features.shape)
print(training_labels.shape)
print(test_features.shape)
print(test_labels.shape)

(514789, 22)
(514789, 1)
(57198, 22)
(57198, 1)


In [14]:
input_neurons = training_features.shape[1]
hl1_neuron = 500
hl2_neuron = 500
output_neurons = training_labels.shape[1]
batch_size = 100

x = tf.placeholder(tf.float32, [None,input_neurons])
y = tf.placeholder(tf.float32,[None,output_neurons])

hidden1_weights_biases = { "weights": tf.Variable(tf.truncated_normal([input_neurons,hl1_neuron])) , 
                         "biases": tf.Variable(tf.truncated_normal([hl1_neuron]))}

hidden2_weights_biases = { "weights": tf.Variable(tf.truncated_normal([hl1_neuron,hl2_neuron])) , 
                         "biases": tf.Variable(tf.truncated_normal([hl2_neuron]))}

output_weights_biases =  { "weights": tf.Variable(tf.truncated_normal([hl2_neuron,output_neurons])) , 
                         "biases": tf.Variable(tf.truncated_normal([output_neurons]))}

l1 = tf.add(tf.matmul(x,hidden1_weights_biases["weights"]), hidden1_weights_biases["biases"])
l1 = tf.nn.sigmoid(l1)

l2 = tf.add(tf.matmul(l1,hidden2_weights_biases["weights"]), hidden2_weights_biases["biases"])
l2 = tf.nn.sigmoid(l2)

prediction = tf.add(tf.matmul(l2,output_weights_biases["weights"]), output_weights_biases["biases"])

mean_square = tf.reduce_mean( tf.pow( prediction - y, 2 ))
train_step = tf.train.AdagradOptimizer( learning_rate=0.5 ).minimize( mean_square )


def next_batch(batch_size):
    
    rows_from = batch_count
    rows_to = (batch_count+batch_size)
    # select row from 100 to 200 where 100 is the batch_Count and 200 is the batch_count + batch_size
    return training_features[rows_from:rows_to,:] , training_labels[rows_from:rows_to,:]

hm_epochs = 10

print( "Beginning Training" )

sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
    
    # start training until we stop, either because we've reached the max
    # number of epochs, or successive errors are close enough to each other
    # (less than tolerance)
    
for epoch in range(hm_epochs):
    epoch_loss = 0
    batch_count = 0
    for index in range(int(training_features.shape[0]/batch_size)):
        batch_xs, batch_ys = next_batch(batch_size)
        batch_count+=batch_size
        _, c = sess.run([train_step,mean_square], feed_dict={x: batch_xs, y: batch_ys})
        epoch_loss += c
    print('Epoch',epoch,'completed out of',hm_epochs,'loss:',epoch_loss)

print( "Test Cost =", sess.run(mean_square, feed_dict={x: test_features, y: test_labels}) )

    # compute the predicted output for test_x
pred_y = sess.run( prediction, feed_dict={x: test_features, y: test_labels} )
print(test_labels)
print(pred_y)
# print( "\nPrediction\nreal\tpredicted" )
# for (y, yHat ) in zip( test_labels, pred_y )[0:10]:
#     print( "%1.1f\t%1.1f" % (y, yHat ) )

Beginning Training
Epoch 0 completed out of 10 loss: 61931.7998324
Epoch 1 completed out of 10 loss: 266.227326419
Epoch 2 completed out of 10 loss: 183.700211356
Epoch 3 completed out of 10 loss: 169.27761441
Epoch 4 completed out of 10 loss: 150.8988877
Epoch 5 completed out of 10 loss: 124.654282911
Epoch 6 completed out of 10 loss: 119.194414256
Epoch 7 completed out of 10 loss: 148.460624624
Epoch 8 completed out of 10 loss: 110.048333298
Epoch 9 completed out of 10 loss: 97.6879375773
Test Cost = 0.0163909
[[ 0.86627667]
 [ 0.54898222]
 [ 0.11671222]
 ..., 
 [ 2.06645   ]
 [ 1.95716667]
 [ 2.05415   ]]
[[ 0.79901397]
 [ 0.29979372]
 [ 0.20209718]
 ..., 
 [ 2.07268   ]
 [ 1.9854809 ]
 [ 2.07171392]]
