In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.preprocessing import scale
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

import time
import math

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [2]:
df = pd.read_pickle("../data/large200analysis.pickle")

In [3]:
# Drop NA columns
df = df.drop("complexFeatures.entropyDegreeDistribution", axis=1)
df = df.drop("complexFeatures.vertexParticipationCoefficient", axis=1)

In [4]:
import re
columnNames = list(df)
regexTimes = re.compile(".*Times")
timesColumnNames = list(filter(regexTimes.match, columnNames))
for column in timesColumnNames:
    columnNames.remove(column)
    
regexCosts = re.compile("heuristics.*Costs")
costsColumnNames = list(filter(regexCosts.match, columnNames))
for column in costsColumnNames:
    columnNames.remove(column)
    
columnNames.remove("generated")
columnNames.remove("deepWalk")
columnNames.remove("sequenceLength")
columnNames.remove("costs")

In [5]:
# Drop rows with NA
rowsBefore = df.shape[0]
df = df.dropna()
print("Dropped %d rows due to None values" % (rowsBefore - df.shape[0]))

minCostIndices = df[["heuristics.tabuCosts", "heuristics.simulatedAnnealingCosts", "heuristics.graspCosts", "heuristics.geneticCosts", "heuristics.antColonyCosts"]].idxmin(axis=1)

# Remove all *Times columns
df = df[columnNames]

# Remove name column
df = df.drop(["name"], axis=1)

Dropped 0 rows due to None values


In [7]:
intLabels = LabelEncoder().fit_transform(minCostIndices).reshape(-1, 1)
# 5 values for 5 different heuristics
outputs = OneHotEncoder(sparse=False, n_values=5).fit_transform(intLabels)

inputs = scale(df.astype('float64'),axis=1)

size = df.shape[0]
# Test data is separated in cleaning stage
trainSize = int(size * 0.75)
validSize = size - trainSize

inputsTrain = inputs[0:trainSize]
outputsTrain = outputs[0:trainSize]
intLabelsTrain = intLabels[0:trainSize]

inputsValid = inputs[trainSize:]
outputsValid = outputs[trainSize:]
intLabelsValid = intLabels[trainSize:]



In [8]:
inputsValid.shape

(3523, 38)

In [33]:
outputsTrain

array([[ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       ..., 
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [46]:
EPOCHS = 1000

N1 = trainSize
FEATURE_COUNT = df.shape[1]
LABEL_COUNT = 5

NODES1 = 512
NODES2 = 256

ALPHA = 0.08

BATCH_SIZE = 30

STD = 0.1

LEARNING_RATE = 0.001

In [47]:
# Define the input function for training
inputFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsTrain.astype(np.float32)}, y=outputsTrain.astype(np.float32),
#     batch_size=BATCH_SIZE, num_epochs=EPOCHS, shuffle=True)
    num_epochs=EPOCHS, shuffle=True)

In [48]:
# Define the neural network
def network(xDict):
    x = xDict["input"]
        
    regularizer = tf.contrib.layers.l2_regularizer(scale=ALPHA)
    
    layer1 = tf.layers.dense(x, NODES1, kernel_regularizer=regularizer, activation=tf.nn.relu)
    
    layer2 = tf.layers.dense(layer1, NODES2, activation=tf.nn.relu)
    # Output fully connected layer with a neuron for each class
    outLayer = tf.layers.dense(layer2, LABEL_COUNT)
    return outLayer

In [57]:
# Define the model function (following TF Estimator Template)
def modelFunc(features, labels, mode):
    # Build the neural network
    logits = network(features)
    
#     resizedLogits = tf.reshape(logits, shape=[-1, MAX_SIZE * MAX_SIZE, 1])
    
    # Predictions
    # TODO: Possibly need to change
#     pred_classes = logits
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)
#     pred_probas = tf.nn.sigmoid(logits)
    
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=LEARNING_RATE)
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred_classes)
#     acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=pred_classes,
      loss=loss_op,
      train_op=train_op,
      eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [58]:
# Build the Estimator
config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.8
model = tf.estimator.Estimator(modelFunc, config=tf.contrib.learn.RunConfig(session_config=config, save_summary_steps=10000, log_step_count_steps=10000))

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f66506231d0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 10000, '_save_checkpoints_secs': 600, '_log_step_count_steps': 10000, '_session_config': , '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/tmpevdyhp3_'}


In [59]:
trainResults = []
validResults = []

In [60]:
trainFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsTrain.astype(np.float32)}, y=outputsTrain.astype(np.float32),
    batch_size=BATCH_SIZE, shuffle=False)

validFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsValid.astype(np.float32)}, y=outputsValid.astype(np.float32),
    batch_size=BATCH_SIZE, shuffle=False)

# for i in range(0, 100):
model.train(inputFunc, steps=10000)

print("Evaluating Train")
accuracy = model.evaluate(trainFunc)
trainResults.append(accuracy)

print("Evaluating Valid")
accuracy = model.evaluate(validFunc)
validResults.append(accuracy)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpevdyhp3_/model.ckpt.
INFO:tensorflow:loss = 1.7661, step = 1
INFO:tensorflow:loss = 1.05966, step = 101 (0.296 sec)
INFO:tensorflow:loss = 1.08522, step = 201 (0.260 sec)
INFO:tensorflow:loss = 1.0894, step = 301 (0.283 sec)
INFO:tensorflow:loss = 1.03391, step = 401 (0.282 sec)
INFO:tensorflow:loss = 1.13668, step = 501 (0.256 sec)
INFO:tensorflow:loss = 1.08396, step = 601 (0.273 sec)
INFO:tensorflow:loss = 0.965325, step = 701 (0.293 sec)
INFO:tensorflow:loss = 1.0104, step = 801 (0.281 sec)
INFO:tensorflow:loss = 0.981573, step = 901 (0.272 sec)
INFO:tensorflow:loss = 0.98376, step = 1001 (0.283 sec)
INFO:tensorflow:loss = 0.940531, step = 1101 (0.265 sec)
INFO:tensorflow:loss = 1.02575, step = 1201 (0.267 sec)
INFO:tensorflow:loss = 1.11168, step = 1301 (0.259 sec)
INFO:tensorflow:loss = 1.05623, step = 1401 (0.276 sec)
INFO:tensorflow:loss = 1.01421, step = 1501 (0.242 sec)
INFO:tens