In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from copy import deepcopy

In [3]:
%%time
df = pd.read_csv("../input/blocks.csv")
df["minute"] = df["quarter"]

Wall time: 3.21 s


In [49]:
print(df.shape)
df.head()

(10046491, 9)


Unnamed: 0,Level,latBlock,lngBlock,month,day,hour,quarter,count,minute
0,0,0,17,4,14,16,0,2,0
1,0,0,17,4,21,16,2,1,2
2,0,0,17,5,11,11,0,2,0
3,0,0,17,5,11,12,0,11,0
4,0,0,17,5,11,12,1,11,1


In [52]:
df_val = df.loc[df.month>=6]
df = df.loc[df.month<6]
print(df_val.shape)
print(df.shape)

(3858677, 9)
(6187814, 9)


In [53]:
%%time
# format the input and output
def matrixTrans(df, level, between=7):
    tf = deepcopy(df.loc[df['Level'] == level])
    latMax = tf.latBlock.max()+1
    lngMax = tf.lngBlock.max()+1
    tf["order"] = tf.month * 10**2+ tf.day
    orders = sorted(tf["order"].unique())

    def generator():
        for hour in tf.hour.unique():
            for minute in tf.minute.unique():
                yield hour,minute

    g = generator()
    num = 0
    for h,m in g:
        num+= len(orders)-between
    features = np.zeros((num,lngMax,latMax,between))
    labels = np.zeros((num,lngMax,latMax))

    g = generator()
    index = 0
    for hour,minute in g:
        temp = tf.loc[(tf.hour==hour)&(tf.minute==minute)]
        for i in range(between, len(orders)):
            next_ = temp.loc[tf.order==orders[i]]
            for _,row in next_.iterrows():
                labels[index, row['lngBlock'],row['latBlock']] = row['count']
            for b in range(between):
                prev = temp.loc[tf.order==orders[i-1-b]]
                for _, row in prev.iterrows():
                    features[index, row['lngBlock'],row['latBlock'],b] = row['count']
            index += 1
            if index==num:
                return features, labels

    del tf
    features = features[:index,:,:,:]
    labels = labels[:index,:,:]
    return features, labels

features, labels = matrixTrans(df, 0)
features_val, labels_val = matrixTrans(df_val, 0)
print(features.shape)
print(labels.shape)

(4896, 50, 50, 7)
(4896, 50, 50)
Wall time: 22min 32s


In [101]:
def cnn_model(features, labels, mode):
    # Input Layer
    size = features.shape
    kernel_size = [(3,3),(3,3),(3,3)]
    #pool_size = parameters.get("pool_size", [(2,2),(3,3),(2,2)])
    strides = [2,2,2]
    filters = [32, 64, 16]

    input_layer = tf.reshape(features, [-1, size[1], size[2], size[3]])
    label_layer = tf.reshape(labels, [-1, size[1]*size[2]])

    # We only add conv layers with 'same' padding in our model
    # since It's a n*n -> n*n prediction (previous distribution -> future prediction)
    # dimensions should be kept same

    # Convolutional Layer and Pooling Layer#1
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=filters[0],
      kernel_size=kernel_size[0],
      padding="same",
      activation=tf.nn.relu)

    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(
      inputs=conv1,
      filters=filters[1],
      kernel_size=kernel_size[1],
      padding="same",
      activation=tf.nn.relu)
    
    # Convolutional Layer #2 and Pooling Layer #2
    conv3 = tf.layers.conv2d(
      inputs=conv2,
      filters=filters[2],
      kernel_size=kernel_size[2],
      padding="same",
      activation=tf.nn.relu)

    # Dense Layer
    conv3_flat = tf.reshape(conv3, [-1, size[1]*size[2]*filters[2]])
    dense = tf.layers.dense(inputs=conv3_flat, units=1024)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training= True)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units= size[1]*size[2])

    # Loss
    loss = tf.losses.mean_squared_error(labels=label_layer, predictions=logits)
    
    predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "distribution": logits
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
      "mse": tf.metrics.mean_squared_error(
          labels=label_layer, predictions=logits)
    }
    return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [102]:
mnist_classifier = tf.estimator.Estimator(
    model_fn=cnn_model)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=features,
    y=labels,
    batch_size=512,
    num_epochs=None,
    shuffle=True)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\yuhan\\AppData\\Local\\Temp\\tmpss051w82', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000011D874880B8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [103]:
mnist_classifier.train(
    input_fn=train_input_fn,
    steps=250)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt.
INFO:tensorflow:loss = 1625.4457, step = 0
INFO:tensorflow:global_step/sec: 0.560808
INFO:tensorflow:loss = 177.97238, step = 100 (178.314 sec)
INFO:tensorflow:global_step/sec: 0.558712
INFO:tensorflow:loss = 362.62424, step = 200 (178.983 sec)
INFO:tensorflow:Saving checkpoints for 250 into C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt.
INFO:tensorflow:Loss for final step: 715.8264.


<tensorflow.python.estimator.estimator.Estimator at 0x11e2677be80>

In [104]:
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=features,
    y=labels,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-04-09-05:31:58
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt-250
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-09-05:32:05
INFO:tensorflow:Saving dict for global step 250: global_step = 250, loss = 342.47195, mse = 349.07913
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 250: C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt-250
{'loss': 342.47195, 'mse': 349.07913, 'global_step': 250}


In [105]:
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=features_val,
    y=labels_val,
    num_epochs=1,
    shuffle=False)

eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-04-09-05:32:08
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt-250
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-09-05:32:16
INFO:tensorflow:Saving dict for global step 250: global_step = 250, loss = 411.14072, mse = 415.78217
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 250: C:\Users\yuhan\AppData\Local\Temp\tmpss051w82\model.ckpt-250
{'loss': 411.14072, 'mse': 415.78217, 'global_step': 250}


In [None]:
# the aim of rmse is 15, for now the rmse from cnn model is 20 