In [1]:
import glob, os, json
import solver
import pickle

import numpy as np
import pandas as pd

import tensorflow as tf

from sklearn.preprocessing import scale
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import normalize
from sklearn.metrics import accuracy_score

import time
import math

import traceback

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [3]:
MAX_SIZE = 300
INSTANCE_SIZE = 300

# Prep Dataset

In [2]:
df = pd.read_pickle("../data/features/optGenAntAsymAnalysis.pickle")
# # Drop rows with NA
# rowsBefore = df.shape[0]
# df = df.dropna()
# print("Dropped %d rows due to None values" % (rowsBefore - df.shape[0]))

In [3]:
def loadDeepWalkInstance(path):
    file = open(path, "r")
    
    i = 0
    
    nodeCount = None
    shape = None
    
    instance = None
    
    for line in file:
        if i == 0:
            split = line.split(" ")
            nodeCount = int(split[0])
            length = int(split[1])
            
            instance = np.zeros(shape=(nodeCount, length))
        else:
            split = line.split(" ")
            
            node = split[0]
            encoding = np.array(list(map(float, split[1:])))
            
            instance[i - 1] = encoding
            
        i += 1
    
    file.close()
    
    return instance

def loadDeepWalkInstances(path):
    instances = []
    names = []
    for file in glob.glob(path + "*.deep"):
        try:
            instance = loadDeepWalkInstance(file)
            name = os.path.splitext(os.path.splitext(os.path.basename(file))[0])[0]

            instances.append(instance)
            names.append(name)
        except:
            traceback.print_exc()
    
    return instances, names

In [4]:
instances, names = loadDeepWalkInstances("../data/deepwalk2/")

In [5]:
len(names)

840

In [6]:
len(df["name"].unique())

984

In [8]:
# Merge in DeepWalk data
dwInstances = pd.DataFrame(columns=["name", "deepWalk", "sequenceLength"])
reshapedInstances = []
for index, name in enumerate(names):
    instance = instances[index]
    instance = instance.reshape(-1)
    
    size = instance.shape[0]
    
    if name == "pr2392p":
        continue
    
    if size >= INSTANCE_SIZE * MAX_SIZE:
        print(instances[index].shape)
        print("Instance %s is too large" % (name))
        continue
    
    zeroed = np.zeros((INSTANCE_SIZE * MAX_SIZE))
    zeroed[0: size] = instance
    
#     instance = scale(zeroed.astype('float64')).reshape(MAX_SIZE, MAX_SIZE)
    instance = zeroed.astype('float64').reshape(MAX_SIZE, INSTANCE_SIZE)
        
#     reshapedInstances.append(scale(zeroed.astype('float64')).reshape(MAX_SIZE, MAX_SIZE))
    
#     reshapedInstances.append(instance)
#     instance = scale(instance.astype('float64'),axis=1)
    dwInstances = dwInstances.append(pd.DataFrame([[name, instance, size]], columns=["name", "deepWalk", "sequenceLength"]))
    
dwInstances = dwInstances.reset_index().drop("index", axis=1)
df = pd.merge(df, dwInstances, on="name")
df = df.drop("costs", axis=1)

In [9]:
len(df["name"].unique())

245

# Read Dataset

In [4]:
# df.to_pickle("../data/largefeatures/deep300large200noscaleReshaped.pickle")
# df = pd.read_pickle("../data/largefeatures/deeplarge200.pickle")
# df = pd.read_pickle("../data/largefeatures/deep128large200noscaleReshaped.pickle")
df = pd.read_pickle("../data/largefeatures/deep300large200noscaleReshaped.pickle")

In [5]:
df = df.loc[df["metadata.isAsymmetric"] == False]

In [6]:
minCostIndices = df[["heuristics.tabuCosts", "heuristics.simulatedAnnealingCosts", "heuristics.graspCosts", "heuristics.geneticCosts", "heuristics.antColonyCosts"]].idxmin(axis=1)
# minCostIndices = df[["heuristics.tabuCosts", "heuristics.simulatedAnnealingCosts", "heuristics.geneticCosts", "heuristics.antColonyCosts"]].idxmin(axis=1)

In [7]:
import collections
collections.Counter(minCostIndices.tolist())

Counter({'heuristics.antColonyCosts': 82,
         'heuristics.geneticCosts': 1,
         'heuristics.graspCosts': 1298,
         'heuristics.simulatedAnnealingCosts': 10,
         'heuristics.tabuCosts': 333})

In [8]:
inputs = np.array(df["deepWalk"].tolist())
sequenceLengths = np.array(df["sequenceLength"].tolist())

In [9]:
costValues = df[["heuristics.tabuCosts", "heuristics.simulatedAnnealingCosts", "heuristics.graspCosts", "heuristics.geneticCosts", "heuristics.antColonyCosts"]].values
indexRankings = costValues.argsort()

In [10]:
indexRankings

array([[0, 2, 4, 1, 3],
       [0, 2, 4, 1, 3],
       [0, 2, 4, 1, 3],
       ..., 
       [2, 1, 0, 4, 3],
       [0, 4, 2, 3, 1],
       [1, 3, 2, 0, 4]])

In [11]:
# intLabels = LabelEncoder().fit_transform(minCostIndices).reshape(-1, 1)
# # 5 values for 5 different heuristics
# # Drop grasp from analysis
# outputs = OneHotEncoder(sparse=False, n_values=5).fit_transform(intLabels)

# inputs = df

size = df.shape[0]
# Test data is separated in cleaning stage
trainSize = int(size * 0.75)
validSize = size - trainSize

inputsTrain = inputs[0:trainSize]
lengthsTrain = sequenceLengths[0:trainSize]
outputsTrainUnnorm = indexRankings[0:trainSize]
outputsTrain = normalize(outputsTrainUnnorm)

inputsValid = inputs[trainSize:]
lengthsValid = sequenceLengths[trainSize:]
outputsValidUnnorm = indexRankings[trainSize:]
outputsValid = normalize(outputsValidUnnorm)

In [12]:
EPOCHS = 100

N1 = trainSize
LABEL_COUNT = 5

NODES1 = 512
NODES2 = 512
NODES3 = 256
NODES4 = 256
NODES5 = 128
NODES6 = 128
NODES7 = 64

LSTM_SIZE = 150
LSTM_LAYER_COUNT = 2
LSTM_DROPOUT_PROB = 0.7

ALPHA = 0.001

BATCH_SIZE = 5

STD = 0.1

LEARNING_RATE = 0.00001

In [13]:
# Define the input function for training
inputFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsTrain.astype(np.float32), "length": lengthsTrain.astype(np.int32)}, y=outputsTrainUnnorm.astype(np.float32),
#     batch_size=BATCH_SIZE, num_epochs=EPOCHS, shuffle=True)
    num_epochs=EPOCHS, shuffle=True)

In [14]:
# Define the neural network
def network(xDict, mode):
    x = xDict["input"]
    
    length = xDict["length"]
    
    if mode != tf.estimator.ModeKeys.PREDICT:
        x = tf.nn.dropout(x, LSTM_DROPOUT_PROB)
        
    with tf.variable_scope('lstm1'):
        initialCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
        lstmOutput, _ = initialCell(x, dtype=tf.float32)

        if mode != tf.estimator.ModeKeys.PREDICT:
            lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
    
    with tf.variable_scope('lstm2'):
        secondCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
        lstmOutput, _ = secondCell(lstmOutput, dtype=tf.float32)
        
        if mode != tf.estimator.ModeKeys.PREDICT:
            lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
            
    with tf.variable_scope('lstm3'):
        thirdCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
        lstmOutput, _ = thirdCell(lstmOutput, dtype=tf.float32)
        
        if mode != tf.estimator.ModeKeys.PREDICT:
            lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
            
    with tf.variable_scope('lstm4'):
        fourthCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
        lstmOutput, _ = fourthCell(lstmOutput, dtype=tf.float32)
        
        if mode != tf.estimator.ModeKeys.PREDICT:
            lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
            
#     with tf.variable_scope('lstm5'):
#         fifthCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
#         lstmOutput, _ = fifthCell(lstmOutput, dtype=tf.float32)
        
#         if mode != tf.estimator.ModeKeys.PREDICT:
#             lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
            
#     with tf.variable_scope('lstm6'):
#         sixthCell = tf.contrib.rnn.LSTMBlockFusedCell(LSTM_SIZE)
        
#         lstmOutput, _ = sixthCell(lstmOutput, dtype=tf.float32)
        
#         if mode != tf.estimator.ModeKeys.PREDICT:
#             lstmOutput = tf.nn.dropout(lstmOutput, LSTM_DROPOUT_PROB)
                
    flatten = tf.contrib.layers.flatten(lstmOutput)
    
    regularizer = tf.contrib.layers.l2_regularizer(scale=ALPHA)
    
    # Hidden fully connected layer
    layer1 = tf.layers.dense(flatten, NODES1, activation=tf.nn.relu)
    layer2 = tf.layers.dense(layer1, NODES2, activation=tf.nn.relu)
    layer3 = tf.layers.dense(layer2, NODES3, activation=tf.nn.relu)
    layer4 = tf.layers.dense(layer3, NODES4, activation=tf.nn.relu)
    layer5 = tf.layers.dense(layer4, NODES5, activation=tf.nn.relu)
    layer6 = tf.layers.dense(layer5, NODES6, activation=tf.nn.relu)
    layer7 = tf.layers.dense(layer6, NODES7, activation=tf.nn.relu)
    # Output fully connected layer with a neuron for each class
    outLayer = tf.layers.dense(layer7, LABEL_COUNT)

    return outLayer

## Loss Functions

In [15]:
# Kullback-Leibler Divergence, as per https://stackoverflow.com/a/43298483
def klDivergence(p, q):
    pClipped = tf.clip_by_value(p, 1e-10, 1.0)
    qClipped = tf.clip_by_value(q, 1e-10, 1.0)
    return tf.reduce_sum(pClipped * tf.log(pClipped/qClipped))

# Loss function based off of Jensen-Shannon Divergence
def loss(label, prediction):
    mean = 0.5 * (label + prediction)
    return 0.5 * klDivergence(label, mean) + 0.5 * klDivergence(prediction, mean)

def log2(x):
    numerator = tf.log(x)
    denominator = tf.log(tf.constant(2, dtype=numerator.dtype))
    return numerator / denominator

def listNetLoss(label, prediction):
    softMaxLabel = tf.nn.softmax(label)
    softMaxPrediction = tf.nn.softmax(prediction)
    return -tf.reduce_mean(softMaxLabel * tf.log(softMaxPrediction))

def listMLE(label, prediction):
    sortedPrediction = tf.gather(prediction, tf.nn.top_k(label, k=5).indices)
    final = tf.log(tf.reduce_sum(tf.exp(sortedPrediction)))
    return tf.reduce_sum(final - sortedPrediction)

def listMLE2Loss(labels, predictions, length, length64):
    i = tf.constant(0, dtype=tf.int32)
    innerSum = tf.constant(0, dtype=tf.float32)
    
    def loop(label, prediction, i, innerSum):
        return tf.add(i, 1), tf.add(innerSum, listMLE2(label, prediction))
    
    cond = lambda i, _: tf.less(i, length)
    operation = lambda i, innerSum: loop(labels[i], predictions[i], i, innerSum)
    result = tf.while_loop(cond, operation, [i, innerSum])

    return result[1]/length64
#     return tf.constant(1.0, dtype=tf.float64) * labels + predictions

def listMLE2(label, prediction):
    # Length of vectors
    k = tf.constant(LABEL_COUNT, dtype=tf.int32)
    
    sortedPrediction = tf.gather(prediction, tf.nn.top_k(label, k=k).indices)
    
    j = tf.constant(0, dtype=tf.int32)
    innerSum = tf.constant(0, dtype=tf.float32)
    cond = lambda j, _: tf.less(j, k)
    operation = lambda j, innerSum: listMLE2Loop(sortedPrediction, j, k, innerSum)
    result = tf.while_loop(cond, operation, [j, innerSum])
    
    print(result[1].shape)
    
    return -result[1]
    
def listMLE2Loop(sortedPrediction, j, k, innerSum):
    return tf.add(j, 1), tf.add(innerSum, listMLE2Inner(sortedPrediction, j, k))

def listMLE2Inner(sortedPrediction, j, k):
    numerator = tf.exp(tf.gather(sortedPrediction, j))
    denominator = tf.reduce_sum(tf.exp(sortedPrediction[j:k]))
    
    return tf.log(numerator/denominator)

# Builds an integer ranking out of a 1-D tensor
def convertPredToRank(prediction):
    return tf.cast(tf.nn.top_k(prediction, k=5).indices, dtype=tf.float32)

## Accuracy Measures

In [16]:
# Accuracy metric using Normalized Discounted Cumulative Gain, as per https://github.com/shiba24/learning2rank/
def ndcg(labels, predictions, k=5):
    topK = tf.nn.top_k(labels, k=5)
    sortedValues = topK.values
    sortedIndices = topK.indices
#         print(labelSorted)
#         labelSorted = sorted(label, reverse=True)
    ideal_dcg = 0
    for i in range(k):
#             ideal_dcg += (2 ** labelSorted[:i] - 1.) / log2(tf.cast(i + 2, tf.float64))
        ideal_dcg += (tf.cast(sortedValues[i] + 1, tf.float32)) / log2(tf.cast(i + 2, tf.float32))
    dcg = 0
#         argsort_indices = np.argsort(predictions)[::-1]
#         argsort_indices = tf.nn.top_k(predictions, k=5).indices
#         print(argsort_indices)
    for i in range(k):
        dcg += (tf.gather(predictions, sortedIndices[i]) + 1) / log2(tf.cast(i + 2, tf.float32))
#         dcg += (predictions[i] + 1) / log2(tf.cast(i + 2, tf.float64))
    return dcg / ideal_dcg

def spearmanCorrelation(label, prediction):
    length = tf.cast(tf.shape(prediction)[0], tf.float32)
    sumVal = tf.reduce_sum(tf.square(tf.subtract(prediction, label)))
    return 1 - 6 * sumVal / (length ** 3 - length)

# Bound Spearman coeff. between 0 and 1
def boundedSpearman(label, prediction):
    return (spearmanCorrelation(label, prediction) + 1.)/2

def top1Match(label, prediction):
    return tf.cast(tf.equal(label[0], prediction[0]), tf.float32)

def top2Match(label, prediction):
    sameFirstOrSecond = tf.logical_or(tf.equal(label[0], prediction[0]), tf.equal(label[1], prediction[1]))
    sameFirstAndSecond = tf.logical_or(tf.equal(label[1], prediction[0]), tf.equal(label[0], prediction[1]))
    return tf.cast(tf.logical_or(sameFirstOrSecond, sameFirstAndSecond), tf.float32)

In [17]:
# # Define the model function (following TF Estimator Template)
# def modelFunc(features, labels, mode):
#     # Build the neural network
#     logits = network(features)
    
# #     resizedLogits = tf.reshape(logits, shape=[-1, MAX_SIZE * MAX_SIZE, 1])
    
#     # Predictions
#     # TODO: Possibly need to change
#     pred_classes = logits
# #     pred_classes = tf.argmax(logits, axis=1)
# #     pred_probas = tf.nn.softmax(logits)
#     pred_probas = tf.nn.sigmoid(logits)
    
#     # If prediction mode, early return
#     if mode == tf.estimator.ModeKeys.PREDICT:
#         return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
#     print(logits.shape)
# #     print(resizedLogits.shape)
#     print(labels.shape)
#     print(pred_classes.shape)
        
#     # Define loss and optimizer
# #     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
# #         logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
#     loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
#         logits=logits, labels=labels))
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
#     train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
#     # Evaluate the accuracy of the model
# #     acc_op = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred_classes)
#     acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
#     # TF Estimators requires to return a EstimatorSpec, that specify
#     # the different ops for training, evaluating, ...
#     estim_specs = tf.estimator.EstimatorSpec(
#       mode=mode,
#       predictions=pred_classes,
#       loss=loss_op,
#       train_op=train_op,
#       eval_metric_ops={'accuracy': acc_op})

#     return estim_specs

# Define the model function (following TF Estimator Template)
def modelFunc(features, labels, mode):
    # Build the neural network
    logits = network(features, mode)
    
#     resizedLogits = tf.reshape(logits, shape=[-1, MAX_SIZE * MAX_SIZE, 1])
    
    # Predictions
    # TODO: Possibly need to change
#     pred_classes = logits
    pred_classes = tf.map_fn(convertPredToRank, logits)
#     pred_classes = tf.argmax(logits, axis=1)
#     pred_probas = tf.nn.softmax(logits)
    
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    # Define loss and optimizer
#     loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
#         logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
#     loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
#         logits=logits, labels=labels))
#     loss_op = tf.reduce_mean(loss(labels, logits))
    loss_op = tf.reduce_mean(listNetLoss(labels, logits))
#     loss_map = tf.map_fn(lambda x: listMLE2(x[0], x[1]), (labels, pred_classes), dtype=tf.float64)
#     print(labels.get_shape()[0])
#     labels_length = tf.shape(labels)[0]
#     loss_op = tf.reduce_mean(listMLE2Loss(labels, logits, labels_length, tf.cast(labels_length, dtype=tf.float32)))
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)
#     optimizer = tf.contrib.opt.NadamOptimizer(learning_rate=LEARNING_RATE)
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    # Evaluate the accuracy of the model
#     acc_op = tf.metrics.accuracy(labels=tf.argmax(labels, axis=1), predictions=pred_classes)
#     acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    ndcg_map = tf.map_fn(lambda x: ndcg(x[0], x[1]), (labels, pred_classes), dtype=tf.float32)
    ndcg_op = tf.metrics.mean(ndcg_map)
    top1_map = tf.map_fn(lambda x: top1Match(x[0], x[1]), (labels, pred_classes), dtype=tf.float32)
    top1_op = tf.metrics.mean(top1_map)
    top2_map = tf.map_fn(lambda x: top2Match(x[0], x[1]), (labels, pred_classes), dtype=tf.float32)
    top2_op = tf.metrics.mean(top2_map)
    spearman_map = tf.map_fn(lambda x: boundedSpearman(x[0], x[1]), (labels, pred_classes), dtype=tf.float32)
    acc_op = tf.metrics.mean(spearman_map)
    
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=pred_classes,
      loss=loss_op,
      train_op=train_op,
      eval_metric_ops={'accuracy': acc_op, 'ndcg': ndcg_op, 'top1Classification': top1_op, 'top2Classification': top2_op})

    return estim_specs

In [18]:
# Build the Estimator
config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.5
model = tf.estimator.Estimator(modelFunc, config=tf.contrib.learn.RunConfig(session_config=config))

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1c814a7908>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': , '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/tmp40tnv8lg'}


In [19]:
trainResults = []
validResults = []

In [20]:
trainFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsTrain.astype(np.float32), "length": lengthsTrain}, y=outputsTrainUnnorm.astype(np.float32),
    batch_size=BATCH_SIZE, shuffle=False)

validFunc = tf.estimator.inputs.numpy_input_fn(
    x={"input": inputsValid.astype(np.float32), "length": lengthsValid}, y=outputsValidUnnorm.astype(np.float32),
    batch_size=BATCH_SIZE, shuffle=False)

for i in range(0, 60):
    model.train(inputFunc, steps=2000)

    print("Evaluating Train")
    accuracy = model.evaluate(trainFunc)
    trainResults.append(accuracy)

    print("Evaluating Valid")
    accuracy = model.evaluate(validFunc)
    validResults.append(accuracy)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.321911, step = 1
INFO:tensorflow:global_step/sec: 4.76724
INFO:tensorflow:loss = 0.311722, step = 101 (20.980 sec)
INFO:tensorflow:global_step/sec: 4.97814
INFO:tensorflow:loss = 0.317827, step = 201 (20.088 sec)
INFO:tensorflow:global_step/sec: 5.09416
INFO:tensorflow:loss = 0.310247, step = 301 (19.630 sec)
INFO:tensorflow:global_step/sec: 5.00035
INFO:tensorflow:loss = 0.31284, step = 401 (19.998 sec)
INFO:tensorflow:global_step/sec: 5.08344
INFO:tensorflow:loss = 0.303544, step = 501 (19.674 sec)
INFO:tensorflow:global_step/sec: 4.91548
INFO:tensorflow:loss = 0.304413, step = 601 (20.342 sec)
INFO:tensorflow:global_step/sec: 5.06455
INFO:tensorflow:loss = 0.307015, step = 701 (19.745 sec)
INFO:tensorflow:global_step/sec: 5.14885
INFO:tensorflow:loss = 0.307736, step = 801 (19.421 sec)
INFO:tensorflow:global_step/sec: 5.09313
INFO:tensorflow

INFO:tensorflow:Finished evaluation at 2018-05-11-15:50:59
INFO:tensorflow:Saving dict for global step 4044: accuracy = 0.57529, global_step = 4044, loss = 0.405459, ndcg = 0.882636, top1Classification = 0.306264, top2Classification = 0.75638
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-4044
INFO:tensorflow:Saving checkpoints for 4045 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.273911, step = 4045
INFO:tensorflow:global_step/sec: 4.40651
INFO:tensorflow:loss = 0.281156, step = 4145 (22.699 sec)
INFO:tensorflow:global_step/sec: 4.55666
INFO:tensorflow:loss = 0.262573, step = 4245 (21.945 sec)
INFO:tensorflow:global_step/sec: 4.60387
INFO:tensorflow:loss = 0.260843, step = 4345 (21.720 sec)
INFO:tensorflow:global_step/sec: 4.60143
INFO:tensorflow:loss = 0.271023, step = 4445 (21.733 sec)
INFO:tensorflow:global_step/sec: 4.59212
INFO:tensorflow:loss = 0.272397, step = 4545 (21.776 sec)
INFO:tensorflow:glob

INFO:tensorflow:Saving dict for global step 8088: accuracy = 0.596288, global_step = 8088, loss = 0.423571, ndcg = 0.89162, top1Classification = 0.262181, top2Classification = 0.719258
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-8088
INFO:tensorflow:Saving checkpoints for 8089 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.238979, step = 8089
INFO:tensorflow:global_step/sec: 4.41644
INFO:tensorflow:loss = 0.24394, step = 8189 (22.646 sec)
INFO:tensorflow:global_step/sec: 4.48918
INFO:tensorflow:loss = 0.248127, step = 8289 (22.278 sec)
INFO:tensorflow:global_step/sec: 4.50612
INFO:tensorflow:loss = 0.239223, step = 8389 (22.189 sec)
INFO:tensorflow:global_step/sec: 4.52554
INFO:tensorflow:loss = 0.244483, step = 8489 (22.099 sec)
INFO:tensorflow:global_step/sec: 4.50517
INFO:tensorflow:loss = 0.247459, step = 8589 (22.195 sec)
INFO:tensorflow:global_step/sec: 4.52289
INFO:tensorflow:loss = 0.243514, step 

INFO:tensorflow:Saving dict for global step 12132: accuracy = 0.479776, global_step = 12132, loss = 0.2362, ndcg = 0.875125, top1Classification = 0.334107, top2Classification = 0.717711
Evaluating Valid
INFO:tensorflow:Starting evaluation at 2018-05-11-16:22:18
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-12132
INFO:tensorflow:Finished evaluation at 2018-05-11-16:22:21
INFO:tensorflow:Saving dict for global step 12132: accuracy = 0.560557, global_step = 12132, loss = 0.417954, ndcg = 0.880955, top1Classification = 0.24594, top2Classification = 0.693735
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-12132
INFO:tensorflow:Saving checkpoints for 12133 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.235001, step = 12133
INFO:tensorflow:global_step/sec: 4.8851
INFO:tensorflow:loss = 0.237013, step = 12233 (20.473 sec)
INFO:tensorflow:global_step/sec: 4.99114
INFO:tensorflow:loss = 0.229257

INFO:tensorflow:global_step/sec: 4.53926
INFO:tensorflow:loss = 0.228034, step = 16166 (22.032 sec)
INFO:tensorflow:Saving checkpoints for 16176 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:Loss for final step: 0.22879.
Evaluating Train
INFO:tensorflow:Starting evaluation at 2018-05-11-16:37:00
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-16176
INFO:tensorflow:Finished evaluation at 2018-05-11-16:37:07
INFO:tensorflow:Saving dict for global step 16176: accuracy = 0.484725, global_step = 16176, loss = 0.229659, ndcg = 0.873519, top1Classification = 0.317092, top2Classification = 0.723125
Evaluating Valid
INFO:tensorflow:Starting evaluation at 2018-05-11-16:37:09
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-16176
INFO:tensorflow:Finished evaluation at 2018-05-11-16:37:12
INFO:tensorflow:Saving dict for global step 16176: accuracy = 0.582831, global_step = 16176, loss = 0.443329, ndcg = 0.885916, top1Classification = 0.25058, top2Clas

INFO:tensorflow:loss = 0.215044, step = 19710 (21.514 sec)
INFO:tensorflow:global_step/sec: 4.6048
INFO:tensorflow:loss = 0.221897, step = 19810 (21.716 sec)
INFO:tensorflow:global_step/sec: 4.60224
INFO:tensorflow:loss = 0.222514, step = 19910 (21.730 sec)
INFO:tensorflow:global_step/sec: 4.7251
INFO:tensorflow:loss = 0.220198, step = 20010 (21.163 sec)
INFO:tensorflow:global_step/sec: 4.72845
INFO:tensorflow:loss = 0.214595, step = 20110 (21.149 sec)
INFO:tensorflow:global_step/sec: 4.64256
INFO:tensorflow:loss = 0.224307, step = 20210 (21.542 sec)
INFO:tensorflow:Saving checkpoints for 20220 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:Loss for final step: 0.22775.
Evaluating Train
INFO:tensorflow:Starting evaluation at 2018-05-11-16:52:58
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-20220
INFO:tensorflow:Finished evaluation at 2018-05-11-16:53:05
INFO:tensorflow:Saving dict for global step 20220: accuracy = 0.465893, global_step = 20220, loss = 0.22246

INFO:tensorflow:global_step/sec: 4.47189
INFO:tensorflow:loss = 0.213558, step = 23354 (22.366 sec)
INFO:tensorflow:global_step/sec: 4.55933
INFO:tensorflow:loss = 0.207705, step = 23454 (21.932 sec)
INFO:tensorflow:global_step/sec: 4.58118
INFO:tensorflow:loss = 0.213135, step = 23554 (21.829 sec)
INFO:tensorflow:global_step/sec: 4.56683
INFO:tensorflow:loss = 0.214101, step = 23654 (21.897 sec)
INFO:tensorflow:global_step/sec: 4.50509
INFO:tensorflow:loss = 0.218882, step = 23754 (22.196 sec)
INFO:tensorflow:global_step/sec: 4.50711
INFO:tensorflow:loss = 0.212346, step = 23854 (22.187 sec)
INFO:tensorflow:global_step/sec: 4.51446
INFO:tensorflow:loss = 0.215307, step = 23954 (22.150 sec)
INFO:tensorflow:global_step/sec: 4.58746
INFO:tensorflow:loss = 0.215438, step = 24054 (21.799 sec)
INFO:tensorflow:global_step/sec: 4.57056
INFO:tensorflow:loss = 0.210591, step = 24154 (21.879 sec)
INFO:tensorflow:global_step/sec: 4.48316
INFO:tensorflow:loss = 0.213399, step = 24254 (22.306 sec)


INFO:tensorflow:Saving dict for global step 27297: accuracy = 0.614269, global_step = 27297, loss = 0.463082, ndcg = 0.899125, top1Classification = 0.227378, top2Classification = 0.707657
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-27297
INFO:tensorflow:Saving checkpoints for 27298 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.211482, step = 27298
INFO:tensorflow:global_step/sec: 4.52812
INFO:tensorflow:loss = 0.212852, step = 27398 (22.088 sec)
INFO:tensorflow:global_step/sec: 4.6348
INFO:tensorflow:loss = 0.21984, step = 27498 (21.576 sec)
INFO:tensorflow:global_step/sec: 4.56754
INFO:tensorflow:loss = 0.215194, step = 27598 (21.894 sec)
INFO:tensorflow:global_step/sec: 4.5597
INFO:tensorflow:loss = 0.211664, step = 27698 (21.932 sec)
INFO:tensorflow:global_step/sec: 4.59991
INFO:tensorflow:loss = 0.209004, step = 27798 (21.739 sec)
INFO:tensorflow:global_step/sec: 4.54104
INFO:tensorflow:loss = 0.2062

INFO:tensorflow:Finished evaluation at 2018-05-11-17:36:13
INFO:tensorflow:Saving dict for global step 31341: accuracy = 0.457154, global_step = 31341, loss = 0.208239, ndcg = 0.851573, top1Classification = 0.301624, top2Classification = 0.704563
Evaluating Valid
INFO:tensorflow:Starting evaluation at 2018-05-11-17:36:14
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-31341
INFO:tensorflow:Finished evaluation at 2018-05-11-17:36:17
INFO:tensorflow:Saving dict for global step 31341: accuracy = 0.604408, global_step = 31341, loss = 0.457876, ndcg = 0.896237, top1Classification = 0.208817, top2Classification = 0.642691
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-31341
INFO:tensorflow:Saving checkpoints for 31342 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:loss = 0.204431, step = 31342
INFO:tensorflow:global_step/sec: 4.74788
INFO:tensorflow:loss = 0.211554, step = 31442 (21.066 sec)
INFO:tens

INFO:tensorflow:loss = 0.203421, step = 35275 (19.969 sec)
INFO:tensorflow:global_step/sec: 5.00998
INFO:tensorflow:loss = 0.202947, step = 35375 (19.960 sec)
INFO:tensorflow:Saving checkpoints for 35385 into /tmp/tmp40tnv8lg/model.ckpt.
INFO:tensorflow:Loss for final step: 0.201646.
Evaluating Train
INFO:tensorflow:Starting evaluation at 2018-05-11-17:50:50
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-35385
INFO:tensorflow:Finished evaluation at 2018-05-11-17:50:58
INFO:tensorflow:Saving dict for global step 35385: accuracy = 0.449884, global_step = 35385, loss = 0.204681, ndcg = 0.847323, top1Classification = 0.304718, top2Classification = 0.712297
Evaluating Valid
INFO:tensorflow:Starting evaluation at 2018-05-11-17:50:59
INFO:tensorflow:Restoring parameters from /tmp/tmp40tnv8lg/model.ckpt-35385
INFO:tensorflow:Finished evaluation at 2018-05-11-17:51:01
INFO:tensorflow:Saving dict for global step 35385: accuracy = 0.60638, global_step = 35385, loss = 0.4672

KeyboardInterrupt: 

In [24]:
trainResults[-1]

{'accuracy': 0.31632748,
 'global_step': 24765,
 'loss': 0.21875271,
 'ndcg': 0.80116904,
 'top1Classification': 0.1845717,
 'top2Classification': 0.44107902}

In [22]:
predictions = list(model.predict(validFunc))

INFO:tensorflow:Restoring parameters from /tmp/tmpzd3h7s_d/model.ckpt-24765


In [23]:
import collections
collections.Counter(list(map(str, predictions)))

Counter({'[ 0.  1.  2.  3.  4.]': 45,
         '[ 0.  1.  2.  4.  3.]': 7,
         '[ 0.  1.  3.  2.  4.]': 45,
         '[ 0.  1.  3.  4.  2.]': 11,
         '[ 0.  1.  4.  2.  3.]': 7,
         '[ 0.  1.  4.  3.  2.]': 2,
         '[ 0.  2.  1.  3.  4.]': 22,
         '[ 0.  2.  1.  4.  3.]': 4,
         '[ 0.  2.  3.  1.  4.]': 29,
         '[ 0.  2.  3.  4.  1.]': 8,
         '[ 0.  2.  4.  1.  3.]': 19,
         '[ 0.  2.  4.  3.  1.]': 5,
         '[ 0.  3.  1.  2.  4.]': 14,
         '[ 0.  3.  2.  1.  4.]': 18,
         '[ 0.  3.  2.  4.  1.]': 1,
         '[ 0.  3.  4.  2.  1.]': 1,
         '[ 1.  0.  2.  3.  4.]': 59,
         '[ 1.  0.  2.  4.  3.]': 4,
         '[ 1.  0.  3.  2.  4.]': 53,
         '[ 1.  0.  3.  4.  2.]': 20,
         '[ 1.  0.  4.  2.  3.]': 11,
         '[ 1.  0.  4.  3.  2.]': 30,
         '[ 1.  2.  0.  3.  4.]': 3,
         '[ 1.  2.  0.  4.  3.]': 4,
         '[ 1.  2.  3.  0.  4.]': 1,
         '[ 1.  2.  4.  0.  3.]': 1,
         '[ 1.  3.  0.  2

In [123]:
import collections
collections.Counter(list(map(str, outputsTrainUnnorm)))

Counter({'[0 1 2 4 3]': 1,
         '[0 2 1 4 3]': 12,
         '[0 2 4 1 3]': 83,
         '[0 2 4 3 1]': 2,
         '[1 2 4 0 3]': 1,
         '[1 4 2 0 3]': 1,
         '[1 4 2 3 0]': 1,
         '[2 0 1 3 4]': 1,
         '[2 0 1 4 3]': 37,
         '[2 0 4 1 3]': 379,
         '[2 0 4 3 1]': 5,
         '[2 1 0 4 3]': 1,
         '[2 1 4 0 3]': 5,
         '[2 1 4 3 0]': 1,
         '[2 3 1 4 0]': 1,
         '[2 4 0 1 3]': 33,
         '[2 4 0 3 1]': 3,
         '[2 4 1 0 3]': 6,
         '[2 4 1 3 0]': 1,
         '[2 4 3 1 0]': 1,
         '[3 1 4 2 0]': 1,
         '[4 1 2 3 0]': 7,
         '[4 1 3 0 2]': 38,
         '[4 1 3 2 0]': 273,
         '[4 2 0 1 3]': 3,
         '[4 2 0 3 1]': 1,
         '[4 2 1 0 3]': 4,
         '[4 3 1 2 0]': 13})