In [1]:
import pandas as pd
import pickle as pkl
import tensorflow as tf
import time

  from ._conv import register_converters as _register_converters


In [2]:
def model_fn(features, labels, mode):
    emb_size = 50
    batch_size=2
    review_trunc_len=2
    filters=2
    kernel_size=10
        
    word_embeddings = tf.get_variable(
        "word_embeddings",
        shape=[len(dictionary) + 1, emb_size]
    )

    u_inputs = tf.nn.embedding_lookup(word_embeddings, features[0])
    i_inputs = tf.nn.embedding_lookup(word_embeddings, features[1])
    
    user_conv1 = tf.layers.conv1d(
        u_inputs,
        filters,
        kernel_size,
        use_bias=True,
        activation=tf.nn.relu,
        name="user_conv")

    item_conv1 = tf.layers.conv1d(
        i_inputs,
        filters,
        kernel_size,
        use_bias=True,
        activation=tf.nn.relu,
        name="item_conv")

    user_max_pool1 = tf.layers.max_pooling1d(user_conv1, 3, 1)
    item_max_pool1 = tf.layers.max_pooling1d(item_conv1, 3, 1)

    user_flat = tf.layers.flatten(user_max_pool1)
    item_flat = tf.layers.flatten(item_max_pool1)

    dense = tf.layers.dense(tf.concat([user_flat, item_flat], 1), 16, activation=tf.nn.relu)

    logits = tf.layers.dense(dense, 1)
    
    predictions = {
        "rating": logits
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)


    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.mean_squared_error(labels, logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(
            labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

# Test Forward

In [3]:
truncate_len = 400
batch_size=16
def get_pad_fn(max_len, pad_value):
    def pad_and_slice(tensor):
        padded_vec = tf.pad(tensor, [[0, max_len]], constant_values=tf.constant(pad_value, tf.int64))
        return tf.slice(padded_vec, [0], [max_len])
    return pad_and_slice

def get_parse_fn(pad_fn):
    def parse_fn(record):
        features = {
                "user_review": tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
                "item_review": tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
                "rating": tf.FixedLenFeature([1], tf.float32)
            }
        parsed_features = tf.parse_single_example(record, features)
        return pad_fn(parsed_features["user_review"]), pad_fn(parsed_features["item_review"]), parsed_features["rating"]
    return parse_fn
def get_dataset_iterator(loc, batch_size, max_len, pad_value):
    dataset = tf.data.TFRecordDataset(loc)
    dataset = dataset.map(get_parse_fn(get_pad_fn(max_len, pad_value)))
    dataset = dataset.shuffle(1000)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    return iterator
def train_input_fn():
    train_dataset = get_dataset_iterator(
        loc="data/train.tfrecords",
        batch_size=batch_size,
        max_len=truncate_len,
        pad_value=len(dictionary))
    nex = train_dataset.get_next()
    return (nex[0], nex[1]), tf.cast(nex[2], tf.int32)

In [4]:
classifier = tf.estimator.Estimator(
    model_fn=model_fn,
    model_dir="output/model_" + str(int(time.time())))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'output/model_1523499889', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fee63415ef0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:
with open("data/dictionary.pkl", "rb") as f:
    dictionary = pkl.load(f)
# from tensorflow.python import debug as tf_debug
# hook = tf_debug.TensorBoardDebugHook("localhost:6060")
classifier.train(input_fn=train_input_fn) #, hooks=[hook])

INFO:tensorflow:Calling model_fn.
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


47 ops no flops stats due to incomplete shapes.


INFO:tensorflow:Saving checkpoints for 1 into output/model_1523499889/model.ckpt.
INFO:tensorflow:loss = 21.845428, step = 1
