In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.preprocessing import scale
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

import time
import math

In [2]:
df = pd.read_pickle("../data/features/analysis.pickle")

In [3]:
# Parameters
MAX_SIZE = 200

In [4]:
# Drop rows with NA
rowsBefore = df.shape[0]
df = df.dropna()
print("Dropped %d rows due to None values" % (rowsBefore - df.shape[0]))

# Filter instances larger than MAX_SIZE
df = df.loc[df["simpleFeatures.numberVertices"] <= MAX_SIZE]

minCostIndices = df[["heuristics.tabuCosts", "heuristics.simulatedAnnealingCosts", "heuristics.graspCosts", "heuristics.geneticCosts", "heuristics.antColonyCosts"]].idxmin(axis=1)

Dropped 39 rows due to None values


In [5]:
# Pad matrices
paddedArray = np.zeros((len(df), MAX_SIZE * MAX_SIZE))
i = 0
for index, row in df.iterrows():
    zeroed = np.zeros((MAX_SIZE, MAX_SIZE))
    costs = row["costs"]
    zeroed[:costs.shape[0],:costs.shape[1]] = costs
    
    paddedArray[i] = scale(zeroed.astype('float64'), axis=1).reshape(MAX_SIZE * MAX_SIZE)
    
    i += 1

In [6]:
df = paddedArray

In [7]:
intLabels = LabelEncoder().fit_transform(minCostIndices).reshape(-1, 1)
# 5 values for 5 different heuristics
outputs = OneHotEncoder(sparse=False, n_values=5).fit_transform(intLabels)

inputs = df

size = df.shape[0]
# Test data is separated in cleaning stage
trainSize = int(size * 0.75)
validSize = size - trainSize

inputsTrain = inputs[0:trainSize]
outputsTrain = outputs[0:trainSize]
intLabelsTrain = intLabels[0:trainSize]

inputsValid = inputs[trainSize:]
outputsValid = outputs[trainSize:]
intLabelsValid = intLabels[trainSize:]

In [15]:
epoch_count = 0

def minibatch(batchSize, n, input_data, output_data):
    input_batches = np.empty((math.ceil(n/batchSize), batchSize) + input_data.shape[1:])
    output_batches = np.empty((math.ceil(n/batchSize), batchSize) + output_data.shape[1:])
    
    global epoch_count
    epoch_count += 1
    indexes = np.random.permutation(n)
    i = 0
    batch_i = 0
    input_array = np.zeros((batchSize,) + input_data.shape[1:])
    output_array = np.zeros((batchSize,) + output_data.shape[1:])
    for index in indexes:
        input_array[i] = input_data[index]
        output_array[i] = output_data[index]
        i += 1

        if i >= batchSize:
            input_batches[batch_i] = input_array
            output_batches[batch_i] = output_array
            i = 0
            batch_i += 1
    
    if(n % batchSize != 0):
        input_array[i:] = input_data[0:batchSize - i]
        output_array[i:] = output_data[0:batchSize - i]
        input_batches[batch_i] = input_array
        output_batches[batch_i] = output_array
    
    return (input_batches, output_batches)

In [16]:
EPOCHS = 1000

N1 = trainSize
FEATURE_COUNT = df.shape[1]
LABEL_COUNT = 5
LEARNING_RATE = 0.01

NODES1 = 256
NODES2 = 128

ALPHA = 0.08

BATCH_SIZE = 30

STD = 0.1

In [17]:
# Setup Tensorflow

# Constants
x_train_full = tf.constant(inputsTrain, dtype='float32', shape=[trainSize, FEATURE_COUNT])
y_train_full = tf.constant(outputsTrain, dtype='float32', shape=[trainSize, LABEL_COUNT])

x_valid_full = tf.constant(inputsValid, dtype='float32', shape=[validSize, FEATURE_COUNT])
y_valid_full = tf.constant(outputsValid, dtype='float32', shape=[validSize, LABEL_COUNT])

x_train = tf.placeholder(tf.float32, [BATCH_SIZE, FEATURE_COUNT])
y_train = tf.placeholder(tf.float32, [BATCH_SIZE, LABEL_COUNT])

# Variables
W_input = tf.Variable(tf.truncated_normal([FEATURE_COUNT, NODES1], stddev=STD, seed = 0))
b_input = tf.Variable(tf.truncated_normal([1, NODES1], stddev=STD, seed = 0))

W_hidden = tf.Variable(tf.truncated_normal([NODES1, NODES2], stddev=STD, seed = 0))
b_hidden = tf.Variable(tf.truncated_normal([1, NODES2], stddev=STD, seed = 0))

W_hidden2 = tf.Variable(tf.truncated_normal([NODES2, LABEL_COUNT], stddev=STD, seed = 0))
b_hidden2 = tf.Variable(tf.truncated_normal([1, LABEL_COUNT], stddev=STD, seed = 0))

# Optimization
input_layer = tf.nn.relu(tf.matmul(x_train, W_input) + b_input)

hidden_layer = tf.nn.relu(tf.matmul(input_layer, W_hidden) + b_hidden)
hidden2_layer = tf.matmul(hidden_layer, W_hidden2) + b_hidden2

logits_train_full = tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(x_train_full, W_input) + b_input), W_hidden) + b_hidden), W_hidden2) + b_hidden2
logits_valid_full = tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(x_valid_full, W_input) + b_input), W_hidden) + b_hidden), W_hidden2) + b_hidden2

L2 = tf.reduce_mean(ALPHA * (tf.nn.l2_loss(W_input) + tf.nn.l2_loss(W_hidden) + tf.nn.l2_loss(W_hidden2)))

CE = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = hidden2_layer, labels = y_train) + ALPHA * (tf.nn.l2_loss(W_input) + tf.nn.l2_loss(W_hidden) + tf.nn.l2_loss(W_hidden2)))

CE_train_full = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_train_full, labels = y_train_full) + ALPHA * (tf.nn.l2_loss(W_input) + tf.nn.l2_loss(W_hidden) + tf.nn.l2_loss(W_hidden2)))
CE_valid_full = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_valid_full, labels = y_valid_full) + ALPHA * (tf.nn.l2_loss(W_input) + tf.nn.l2_loss(W_hidden) + tf.nn.l2_loss(W_hidden2)))

optimizer = tf.train.AdamOptimizer().minimize(CE)

y_pred_train = tf.nn.softmax(logits_train_full)
y_pred_valid = tf.nn.softmax(logits_valid_full)

init = tf.global_variables_initializer()

In [18]:
logits_train_full.shape

TensorShape([Dimension(1114), Dimension(5)])

In [19]:
y_train_full.shape

TensorShape([Dimension(1114), Dimension(5)])

In [20]:
# Initialize TensorFlow
sess = tf.Session()
sess.run(init)

In [21]:
def printStats():
    (ce_train,ce_valid,p_train,p_valid,l2) = sess.run([CE_train_full, CE_valid_full, y_pred_train, y_pred_valid, L2])
    labels_train_pred = oneHotArray[p_train.argmax(axis=1)]
    labels_valid_pred = oneHotArray[p_valid.argmax(axis=1)]
    error_train = 1 - accuracy_score(intLabelsTrain, labels_train_pred)
    error_valid = 1 - accuracy_score(intLabelsValid, labels_valid_pred)
    total_compute_time = (time.time() - t_start)/60
    print('%7d %7d%12.5f%12.5f%12.3f%12.3f%12f%12.1f' % (EPOCHS,epoch_count,ce_train,ce_valid,error_train,error_valid,l2,total_elapsed_time))

In [22]:
# Minimize MSE

train = True

oneHotArray = np.array([0, 1, 2, 3, 4])

total_elapsed_time = 0

ce_time = 0

epoch_count = 0

print('%15s%24s%24s' % (' ','cross-entropy','error-rate'))
print('%15s%12s%12s%12s%12s%12s%12s' % ('epoch','training','validation','training','validation','L2','time (min)'))

while(train):
    batch = minibatch(BATCH_SIZE, N1, inputsTrain, outputsTrain)

    for step in range(batch[0].shape[0]):
        x_batch = batch[0][step]
        y_batch = batch[1][step]
        
        t_start = time.time()
        sess.run([optimizer], feed_dict={x_train:x_batch,y_train:y_batch})
        t_end = time.time()
        
        total_elapsed_time += (t_end - t_start)/60
        
        if t_end - ce_time > 6:
#             (ce) = sess.run(CE_train_full)
#             print("cross-entropy = %f" % (ce))
            printStats()
            
            ce_time = time.time()

            
        if epoch_count >= EPOCHS:
            train = False
            break

print("Finished")
print("Elapsed Time: %f" % (total_elapsed_time))
print("Epoch Count: %d" % (epoch_count))

printStats()

                          cross-entropy              error-rate
          epoch    training  validation    training  validation          L2  time (min)
   1000       1  3148.19653  3150.28906       0.410       0.519 3142.398438         0.0
   1000       4   365.99637   367.82031       0.016       0.583  365.911133         0.1
   1000       7    33.76867    35.21679       0.019       0.642   33.709114         0.2
   1000      10     4.04681     5.98684       0.016       0.589    3.999326         0.3
   1000      13     1.74086     3.60105       0.014       0.616    1.692008         0.3
   1000      16     1.22298     3.67702       0.019       0.562    1.150932         0.4
   1000      19     1.02193     2.66273       0.016       0.565    0.962402         0.5
   1000      22     0.72340     2.64349       0.018       0.589    0.662447         0.6
   1000      25     0.60644     2.95621       0.016       0.589    0.552494         0.7
   1000      28     0.50330     3.15534       0.037     

   1000     282     0.22251     2.33478       0.018       0.589    0.153632         7.8


KeyboardInterrupt: 

In [9]:
# Define the input function for training
inputFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsTrain}, y=outputsTrain,
    batch_size=BATCH_SIZE, num_epochs=EPOCHS, shuffle=True)

In [10]:
# Define the neural network
def network(xDict):
    x = xDict["input"]
    
    input_layer = tf.reshape(x, shape=[-1, MAX_SIZE, MAX_SIZE, 1])
    
    input_layer = tf.cast(input_layer, tf.float32)
    
    conv1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=10,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)
    
    regularizer = tf.contrib.layers.l2_regularizer(scale=ALPHA)
    
    conv2 = tf.layers.conv2d(
        inputs=conv1,
        filters=20,
        kernel_size=[3, 3],
        padding="same",
        activation=tf.nn.relu)
    
    # Flatten for fully connected
    flatten = tf.contrib.layers.flatten(conv2)
    
#     # Hidden fully connected layer
#     layer1 = tf.layers.dense(flatten, NODES1, kernel_regularizer=regularizer, activation=tf.nn.relu)
    # Hidden fully connected layer
    layer2 = tf.layers.dense(flatten, 64, kernel_regularizer=regularizer, activation=tf.nn.relu)
    # Output fully connected layer with a neuron for each class
    outLayer = tf.layers.dense(layer2, LABEL_COUNT)
    return outLayer

In [11]:
# Define the model function (following TF Estimator Template)
def modelFunc(features, labels, mode):
    # Build the neural network
    logits = network(features)
    
#     resizedLogits = tf.reshape(logits, shape=[-1, MAX_SIZE * MAX_SIZE, 1])
    
    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)
    
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    print(logits.shape)
#     print(resizedLogits.shape)
    print(labels.shape)
    print(pred_classes.shape)
        
    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred_classes)
    
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=pred_classes,
      loss=loss_op,
      train_op=train_op,
      eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [12]:
# Build the Estimator
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.2
model = tf.estimator.Estimator(modelFunc, config=tf.contrib.learn.RunConfig(session_config=config))

  from ._conv import register_converters as _register_converters


INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.2
}
, '_evaluation_master': '', '_environment': 'local', '_tf_random_seed': None, '_model_dir': '/tmp/tmpvc01kutn', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb5dc310fd0>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_task_type': None, '_task_id': 0, '_master': '', '_num_worker_replicas': 0, '_log_step_count_steps': 100, '_is_chief': True, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5}


In [13]:
# Train the Model
model.train(inputFunc, steps=500)

(?, 5)
(?, 5)
(?,)
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpvc01kutn/model.ckpt.
INFO:tensorflow:loss = 1.7536557, step = 1
INFO:tensorflow:global_step/sec: 0.740544
INFO:tensorflow:loss = 0.0857265, step = 101 (135.041 sec)
INFO:tensorflow:global_step/sec: 0.751347
INFO:tensorflow:loss = 0.0070715337, step = 201 (133.093 sec)
INFO:tensorflow:global_step/sec: 0.744341
INFO:tensorflow:loss = 0.011984603, step = 301 (134.347 sec)
INFO:tensorflow:global_step/sec: 0.750681
INFO:tensorflow:loss = 0.0018688831, step = 401 (133.213 sec)
INFO:tensorflow:Saving checkpoints for 449 into /tmp/tmpvc01kutn/model.ckpt.
INFO:tensorflow:Saving checkpoints for 500 into /tmp/tmpvc01kutn/model.ckpt.
INFO:tensorflow:Loss for final step: 0.19548617.


<tensorflow.python.estimator.estimator.Estimator at 0x7fb5dc22c400>

In [14]:
# Evaluate the Model
# Define the input function for evaluating
validFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsValid}, y=outputsValid,
    batch_size=BATCH_SIZE, shuffle=False)
# Use the Estimator 'evaluate' method
model.evaluate(validFunc)

(?, 5)
(?, 5)
(?,)
INFO:tensorflow:Starting evaluation at 2018-04-30-15:01:54
INFO:tensorflow:Restoring parameters from /tmp/tmpvc01kutn/model.ckpt-500
INFO:tensorflow:Finished evaluation at 2018-04-30-15:01:59
INFO:tensorflow:Saving dict for global step 500: accuracy = 0.41129032, global_step = 500, loss = 1.5047578


{'accuracy': 0.41129032, 'global_step': 500, 'loss': 1.5047578}

In [22]:
outputsTrain.shape

(1114, 5)