In [1]:
# Dependencies
import glob, os
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
path = 'Data/merge_data/4_features'                     
all_files = glob.glob(os.path.join(path, "*.csv"))     # advisable to use os.path.join as this makes concatenation OS independent

df_from_each_file = (pd.read_csv(f) for f in all_files)
concatenated_dataset   = pd.concat(df_from_each_file, ignore_index=True)

print("\nTraining a neural network on Spectrum Protect data using TensorFlow ")
print("Loading the Spectrum Protect data to memory...")
# Loading the dataset
# dataset = pd.read_csv('ourdata.csv')
print(concatenated_dataset)
print("Finish loading")


Training a neural network on Spectrum Protect data using TensorFlow 
Loading the Spectrum Protect data to memory...
          POOLID    SIZE     OFFSET  LENGTH
0             27  481580   83025920   27701
1             27  481686   83054592   27517
2             27  481686   83083264   20937
3             27  125893   48144384   33383
4             27  481686   83107840   21768
5             27  481686   83132416   27737
6             27  481686   83161088   22400
7             27  481686   83185664   21981
8             27  327415   90664960   21884
9             27  327415   90800128   22323
10            27  327337   90824704   17923
11            27  481580   83210240   22329
12            27  481686   83234816   25152
13            27  481686   83263488   19543
14            27  756310   82366464   19493
15            27   25820   82386944     240
16            27   50002   82391040    3943
17            27  160010   82395136   13375
18            27  327415   90845184   26477
19 

In [3]:
# One-hot encoding for the categories
concatenated_dataset = pd.get_dummies(concatenated_dataset, columns=['POOLID']) 
values = list(concatenated_dataset.columns.values)
print(concatenated_dataset)

            SIZE     OFFSET  LENGTH  POOLID_24  POOLID_27  POOLID_37  \
0         481580   83025920   27701          0          1          0   
1         481686   83054592   27517          0          1          0   
2         481686   83083264   20937          0          1          0   
3         125893   48144384   33383          0          1          0   
4         481686   83107840   21768          0          1          0   
5         481686   83132416   27737          0          1          0   
6         481686   83161088   22400          0          1          0   
7         481686   83185664   21981          0          1          0   
8         327415   90664960   21884          0          1          0   
9         327415   90800128   22323          0          1          0   
10        327337   90824704   17923          0          1          0   
11        481580   83210240   22329          0          1          0   
12        481686   83234816   25152          0          1       

In [4]:
#Normalization
X_train = concatenated_dataset[values[:4]]
X_train = ((X_train - X_train.min()) / (X_train.max() - X_train.min())).fillna(0)
print(X_train)


              SIZE    OFFSET    LENGTH  POOLID_24
0         0.117573  0.791813  0.006731        0.0
1         0.117599  0.792087  0.006687        0.0
2         0.117599  0.792360  0.005080        0.0
3         0.030735  0.459150  0.008119        0.0
4         0.117599  0.792595  0.005283        0.0
5         0.117599  0.792829  0.006740        0.0
6         0.117599  0.793102  0.005437        0.0
7         0.117599  0.793337  0.005335        0.0
8         0.079935  0.864666  0.005311        0.0
9         0.079935  0.865955  0.005418        0.0
10        0.079916  0.866190  0.004344        0.0
11        0.117573  0.793571  0.005420        0.0
12        0.117599  0.793806  0.006109        0.0
13        0.117599  0.794079  0.004740        0.0
14        0.184646  0.785524  0.004728        0.0
15        0.006303  0.785719  0.000027        0.0
16        0.012207  0.785759  0.000931        0.0
17        0.039065  0.785798  0.003234        0.0
18        0.079935  0.866385  0.006433        0.0


In [5]:
#preprocess the data 
features = np.array(X_train, dtype='float32')
target = np.array(concatenated_dataset[values[4:]], dtype='float32')

# Shuffle Data
indices = np.random.choice(len(features), len(features), replace=False)
X_values = features[indices]
y_values = target[indices]

# Creating a Train and a Test Dataset
test_size = 1000
X_test = X_values[-test_size:]
X_train = X_values[:-test_size]
y_test = y_values[-test_size:]
y_train = y_values[:-test_size]

# print(X_train)
print(X_test)
# print(y_test)
# print(y_train)

[[1.2207278e-02 3.3426476e-01 7.4023457e-04 0.0000000e+00]
 [1.0112308e-03 0.0000000e+00 9.3554711e-04 0.0000000e+00]
 [5.3747571e-03 4.1321170e-01 3.4760751e-03 0.0000000e+00]
 ...
 [1.2212161e-02 6.5852928e-01 4.7143566e-04 0.0000000e+00]
 [5.2067883e-02 7.4185133e-01 5.2067883e-02 0.0000000e+00]
 [2.3341388e-01 1.5031563e-01 8.4682390e-02 0.0000000e+00]]


In [6]:
# define a neural network

# Initialize placeholders
X_data = tf.placeholder(shape=[None, 4], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 10], dtype=tf.float32)

#create seed for random_normal()
seed = 1234
np.random.seed(seed)
tf.set_random_seed(seed)
output_nodes = 10
hidden_layer_nodes = 10
# We create a neural Network which contains 3 layers with 4, 8, 3 nodes repectively
w1 = tf.Variable(tf.random_normal(shape=[4,hidden_layer_nodes])) # Weight of the input layer
b1 = tf.Variable(tf.random_normal(shape=[hidden_layer_nodes]))   # Bias of the input layer
w2 = tf.Variable(tf.random_normal(shape=[hidden_layer_nodes,output_nodes])) # Weight of the hidden layer
b2 = tf.Variable(tf.random_normal(shape=[output_nodes]))                    # Bias of the hidden layer

# tf.summary.histogram("weights", w1)
# tf.summary.histogram("biases", b1)

hidden_output = tf.nn.relu(tf.add(tf.matmul(X_data, w1), b1))
final_output = tf.nn.softmax(tf.add(tf.matmul(hidden_output, w2), b2))

# Loss Function
loss = tf.reduce_mean(-tf.reduce_sum(y_target * tf.log(final_output), axis=0))

# Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

print("A neural Network which contains 3 layers with 4, 10, 8 nodes repectively was created!")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
A neural Network which contains 3 layers with 4, 10, 8 nodes repectively was created!


In [8]:
print('Training the model...')

# Interval / Epochs
interval = 100
epoch = 1000

# Initialize variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

#Logging for Tensorboard
merged_summary = tf.summary.merge_all()
loss_graph = tf.summary.scalar('loss', loss)
writer = tf.summary.FileWriter('graphs', sess.graph)
writer.add_graph(sess.graph)

# Training the model...
for i in range(1, (epoch + 1)):
    s = sess.run(optimizer, feed_dict={X_data: X_train, y_target: y_train})
    if i % interval == 0:
        #Austin Code that doesnt work
        writer.add_summary(s, i)
        #######
        print('Epoch', i, '|', 'Loss:', sess.run(loss, feed_dict={X_data: X_train, y_target: y_train}))

print("Training finished\n")

Training the model...


AttributeError: 'NoneType' object has no attribute 'value'

In [8]:
# get the accuracy of the model
correct_prediction = tf.equal(tf.argmax(final_output, 1), tf.argmax(y_target,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("The accuracy of the model is:", sess.run(accuracy, feed_dict={X_data: X_test, y_target: y_test}))

# tf.summary.scalar('accuracy', accuracy)

The accuracy of the model is: 0.343


<tf.Tensor 'accuracy:0' shape=() dtype=string>

In [35]:
# Prediction
np.set_printoptions(precision=4)
unknown = np.array([[0.693363, 0.0, 0.002894, 0.148097]], dtype=np.float32)
predicted = sess.run(final_output, feed_dict={X_data: unknown})
# model.predict(unknown)
print("Using model to predict pool id for features: ", unknown)
print("\nPredicted softmax vector is: ",predicted)
Class_dict={'POOLID_-1000000': 0, 'POOLID_-9': 1, 'POOLID_-1': 2, 'POOLID_4': 3, 'POOLID_-1': 4, 'POOLID_6': 5, 'POOLID_42': 6, 'POOLID_72': 7, 'POOLID_82': 8 }
pool_dict = {v:k for k,v in Class_dict.items()}
print("\nPredicted pool id is: ", pool_dict[np.argmax(predicted)])

Using model to predict pool id for features:  [[0.6934 0.     0.0029 0.1481]]

Predicted softmax vector is:  [[1.9665e-01 3.4181e-02 1.7584e-04 7.3081e-02 1.2066e-02 6.8273e-01
  2.8249e-04 8.4213e-04]]

Predicted pool id is:  POOLID_6
