In [1]:
import os

import numpy as np
import tensorflow as tf

import time
from datetime import datetime

from dataframe import DataFrame
from datareader_wave import DataReader
from tf_basemodel import TFBaseModel
from tf_utils import (
    time_distributed_dense_layer, temporal_convolution_layer,
    sequence_mean, sequence_smape, shape
)

In [2]:
root_paths = [
    "/Users/jiayou/Dropbox/JuanCode/Kaggle/Wikipedia/data2/", # Mac
    "/Users/jiayou/Dropbox/Documents/JuanCode/Kaggle/Wikipedia/data2/", # 1080
    '/Users/junxie/Dropbox/JuanCode/Insight/project/data_mini/', # pro
    '/mnt/WD Black/Dropbox/JuanCode/Insight/Project/data_mini/', # paperspace
]
root = None
for p in root_paths:
    if os.path.exists(p):
        root = p
        break
print(root)

/Users/junxie/Dropbox/JuanCode/Insight/project/data_mini/


In [3]:
class cnn(TFBaseModel):

    def __init__(
        self,
        residual_channels=32,
        skip_channels=32,
        dilations=[2**i for i in range(8)]*3,
        filter_widths=[2 for i in range(8)]*3,
        num_decode_steps=64,
        **kwargs
    ):
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        self.dilations = dilations
        self.filter_widths = filter_widths
        self.num_decode_steps = num_decode_steps
        super(cnn, self).__init__(**kwargs)

    def transform(self, x):
        return tf.log(x + 1) - tf.expand_dims(self.log_x_encode_mean, 1)

    def inverse_transform(self, x):
        return tf.exp(x + tf.expand_dims(self.log_x_encode_mean, 1)) - 1

    def get_input_sequences(self):
        self.x_encode = tf.placeholder(tf.float32, [None, None])
        self.encode_len = tf.placeholder(tf.int32, [None])
        self.y_decode = tf.placeholder(tf.float32, [None, self.num_decode_steps])
        self.decode_len = tf.placeholder(tf.int32, [None])
        self.is_nan_encode = tf.placeholder(tf.float32, [None, None])
        self.is_nan_decode = tf.placeholder(tf.float32, [None, self.num_decode_steps])

        self.page_id = tf.placeholder(tf.int32, [None])
        self.project = tf.placeholder(tf.int32, [None])
        self.access = tf.placeholder(tf.int32, [None])
        self.agent = tf.placeholder(tf.int32, [None])

        self.keep_prob = tf.placeholder(tf.float32)
        self.is_training = tf.placeholder(tf.bool)

        self.log_x_encode_mean = sequence_mean(tf.log(self.x_encode + 1), self.encode_len)
        self.log_x_encode = self.transform(self.x_encode)
        self.x = tf.expand_dims(self.log_x_encode, 2)

        self.encode_features = tf.concat([
            tf.expand_dims(self.is_nan_encode, 2),
            tf.expand_dims(tf.cast(tf.equal(self.x_encode, 0.0), tf.float32), 2),
            tf.tile(tf.reshape(self.log_x_encode_mean, (-1, 1, 1)), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.project, 9), 1), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.access, 3), 1), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.agent, 2), 1), (1, tf.shape(self.x_encode)[1], 1)),
        ], axis=2)

        decode_idx = tf.tile(tf.expand_dims(tf.range(self.num_decode_steps), 0), (tf.shape(self.y_decode)[0], 1))
        self.decode_features = tf.concat([
            tf.one_hot(decode_idx, self.num_decode_steps),
            tf.tile(tf.reshape(self.log_x_encode_mean, (-1, 1, 1)), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.project, 9), 1), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.access, 3), 1), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.agent, 2), 1), (1, self.num_decode_steps, 1)),
        ], axis=2)

        return self.x

    def encode(self, x, features):
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode'
        )

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2*self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i)
            )
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        return y_hat, conv_inputs[:-1]

    def initialize_decode_params(self, x, features):
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-decode'
        )

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2*self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-decode-{}'.format(i)
            )
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-decode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-decode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-decode-2')
        return y_hat

    def decode(self, x, conv_inputs, features):
        batch_size = tf.shape(x)[0]

        # initialize state tensor arrays
        state_queues = []
        for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)):
            batch_idx = tf.range(batch_size)
            batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation))
            batch_idx = tf.reshape(batch_idx, [-1])

            queue_begin_time = self.encode_len - dilation - 1
            temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0)
            temporal_idx = tf.reshape(temporal_idx, [-1])

            idx = tf.stack([batch_idx, temporal_idx], axis=1)
            slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2)))

            layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps)
            layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2)))
            state_queues.append(layer_ta)

        # initialize feature tensor array
        features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps)
        features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2)))

        # initialize output tensor array
        emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32)

        # initialize other loop vars
        elements_finished = 0 >= self.decode_len
        time = tf.constant(0, dtype=tf.int32)

        # get initial x input
        current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1)
        initial_input = tf.gather_nd(x, current_idx)

        def loop_fn(time, current_input, queues):
            current_features = features_ta.read(time)
            current_input = tf.concat([current_input, current_features], axis=1)

            with tf.variable_scope('x-proj-decode', reuse=True):
                w_x_proj = tf.get_variable('weights')
                b_x_proj = tf.get_variable('biases')
                x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj)

            skip_outputs, updated_queues = [], []
            for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs, queues, self.dilations)):

                state = queue.read(time)
                with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True):
                    w_conv = tf.get_variable('weights'.format(i))
                    b_conv = tf.get_variable('biases'.format(i))
                    dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv
                conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1)
                dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

                with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True):
                    w_proj = tf.get_variable('weights'.format(i))
                    b_proj = tf.get_variable('biases'.format(i))
                    concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj
                skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1)

                x_proj += residuals
                skip_outputs.append(skips)
                updated_queues.append(queue.write(time + dilation, x_proj))

            skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1))
            with tf.variable_scope('dense-decode-1', reuse=True):
                w_h = tf.get_variable('weights')
                b_h = tf.get_variable('biases')
                h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h)

            with tf.variable_scope('dense-decode-2', reuse=True):
                w_y = tf.get_variable('weights')
                b_y = tf.get_variable('biases')
                y_hat = tf.matmul(h, w_y) + b_y

            elements_finished = (time >= self.decode_len)
            finished = tf.reduce_all(elements_finished)

            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, 1], dtype=tf.float32),
                lambda: y_hat
            )
            next_elements_finished = (time >= self.decode_len - 1)

            return (next_elements_finished, next_input, updated_queues)

        def condition(unused_time, elements_finished, *_):
            return tf.logical_not(tf.reduce_all(elements_finished))

        def body(time, elements_finished, emit_ta, *state_queues):
            (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues)

            emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output)
            emit_ta = emit_ta.write(time, emit)

            elements_finished = tf.logical_or(elements_finished, next_finished)
            return [time + 1, elements_finished, emit_ta] + list(state_queues)

        returned = tf.while_loop(
            cond=condition,
            body=body,
            loop_vars=[time, elements_finished, emit_ta] + state_queues
        )

        outputs_ta = returned[2]
        y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2))
        return y_hat

    def calculate_loss(self):
        x = self.get_input_sequences()

        y_hat_encode, conv_inputs = self.encode(x, features=self.encode_features)
        self.initialize_decode_params(x, features=self.decode_features)
        y_hat_decode = self.decode(y_hat_encode, conv_inputs, features=self.decode_features)
        y_hat_decode = self.inverse_transform(tf.squeeze(y_hat_decode, 2))
        y_hat_decode = tf.nn.relu(y_hat_decode)

        self.labels = self.y_decode
        self.preds = y_hat_decode
        self.loss = sequence_smape(self.labels, self.preds, self.decode_len, self.is_nan_decode)

        self.prediction_tensors = {
            'priors': self.x_encode,
            'labels': self.labels,
            'preds': self.preds,
            'page_id': self.page_id,
        }

        return self.loss

In [4]:
def get_nn(reader):
    return cnn(
        reader=reader,
        work_dir='./tf-data',
#         checkpoint_dir=os.path.join('./tf-data', 'checkpoints'),
#         prediction_dir=os.path.join('./tf-data', 'predictions'),
        optimizer='adam',
        learning_rate=.001,
        batch_size=128,
        num_training_steps=20000,#200000
        early_stopping_steps=500,#5000
        warm_start_init_step=0,
        regularization_constant=0.0,
        keep_prob=1.0,
        enable_parameter_averaging=False,
        num_restarts=2,
        min_steps_to_checkpoint=100,#500
        log_interval=10,
        num_validation_batches=1,
        grad_clip=20,
        residual_channels=32,
        skip_channels=32,
        dilations=[2**i for i in range(8)]*3,
        filter_widths=[2 for i in range(8)]*3,
        num_decode_steps=64,
        name='wave',
    )

reader = DataReader(
    data_dir=os.path.join(root, 'processed/')
)

nn = get_nn(reader)  

[[01/31/2018 07:00:19 PM]] 
Network hyper-parameters:
{'batch_size': 128,
 'checkpoint_dir': './tf-data/checkpoints',
 'dilations': [1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128],
 'early_stopping_steps': 500,
 'enable_parameter_averaging': False,
 'filter_widths': [2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
              

--- Logging error ---
Traceback (most recent call last):
  File "/Users/junxie/anaconda/lib/python3.6/logging/__init__.py", line 987, in emit
    msg = self.format(record)
  File "/Users/junxie/anaconda/lib/python3.6/logging/__init__.py", line 833, in format
    return fmt.format(record)
  File "/Users/junxie/anaconda/lib/python3.6/logging/__init__.py", line 570, in format
    record.message = record.getMessage()
  File "/Users/junxie/anaconda/lib/python3.6/logging/__init__.py", line 333, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/Users/junxie/anaconda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/junxie/anaconda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/junxie/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/junxie/anaconda/lib/p

[[01/31/2018 07:00:48 PM]] 

All parameters:
[('Variable:0', []),
 ('Variable_1:0', []),
 ('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('di

In [17]:
print('training start at:', datetime.now().strftime('%Y-%m-%d.%H-%M-%S.%f'))
nn.fit()
print('training ends at:', datetime.now().strftime('%Y-%m-%d.%H-%M-%S.%f'))

training start at: 2018-01-23.21-38-53.486037


[[step        0]]     [[train]]     loss: 0.5801267        [[val]]     loss: 0.58279985       
[[step       10]]     [[train]]     loss: 0.59160571       [[val]]     loss: 0.58986149       
[[step       20]]     [[train]]     loss: 0.58218832       [[val]]     loss: 0.57497193       
[[step       30]]     [[train]]     loss: 0.57649951       [[val]]     loss: 0.56725828       
[[step       40]]     [[train]]     loss: 0.56500178       [[val]]     loss: 0.55407994       
[[step       50]]     [[train]]     loss: 0.54982256       [[val]]     loss: 0.54225223       
[[step       60]]     [[train]]     loss: 0.53550219       [[val]]     loss: 0.53244147       
[[step       70]]     [[train]]     loss: 0.52761102       [[val]]     loss: 0.52411054       
[[step       80]]     [[train]]     loss: 0.51864632       [[val]]     loss: 0.51614727       
[[step       90]]     [[train]]     loss: 0.51106744       [[val]]     loss: 0.50802393       
[[step      100]]     [[train]]     loss: 0.505300

INFO:tensorflow:Restoring parameters from ./tf-data/checkpoints/model-2120


halving learning rate
[[step     2130]]     [[train]]     loss: 0.38628852       [[val]]     loss: 0.38454723       
[[step     2140]]     [[train]]     loss: 0.38660409       [[val]]     loss: 0.38537622       
[[step     2150]]     [[train]]     loss: 0.38673066       [[val]]     loss: 0.38547195       
[[step     2160]]     [[train]]     loss: 0.38905879       [[val]]     loss: 0.38342705       
saving model to ./tf-data/checkpoints/model
[[step     2170]]     [[train]]     loss: 0.38856491       [[val]]     loss: 0.38355135       
[[step     2180]]     [[train]]     loss: 0.39001562       [[val]]     loss: 0.38267936       
saving model to ./tf-data/checkpoints/model
[[step     2190]]     [[train]]     loss: 0.38986086       [[val]]     loss: 0.38258647       
saving model to ./tf-data/checkpoints/model
[[step     2200]]     [[train]]     loss: 0.39010046       [[val]]     loss: 0.38383817       
[[step     2210]]     [[train]]     loss: 0.39246388       [[val]]     loss: 0.3846232

INFO:tensorflow:Restoring parameters from ./tf-data/checkpoints/model-2560


halving learning rate
[[step     2570]]     [[train]]     loss: 0.38462843       [[val]]     loss: 0.38407839       
[[step     2580]]     [[train]]     loss: 0.38385752       [[val]]     loss: 0.38355519       
[[step     2590]]     [[train]]     loss: 0.38443792       [[val]]     loss: 0.38309681       
[[step     2600]]     [[train]]     loss: 0.38667188       [[val]]     loss: 0.38248799       
[[step     2610]]     [[train]]     loss: 0.38647393       [[val]]     loss: 0.38213151       
[[step     2620]]     [[train]]     loss: 0.38481492       [[val]]     loss: 0.38107591       
[[step     2630]]     [[train]]     loss: 0.38515978       [[val]]     loss: 0.38200959       
[[step     2640]]     [[train]]     loss: 0.38589236       [[val]]     loss: 0.38168371       
[[step     2650]]     [[train]]     loss: 0.38636651       [[val]]     loss: 0.38185505       
[[step     2660]]     [[train]]     loss: 0.38578927       [[val]]     loss: 0.38083021       
[[step     2670]]     [[trai

training ends at: 2018-01-23.23-09-42.006529


In [7]:
reader = DataReader(
    data_dir=os.path.join(root, 'processed_full/')
)

nn = get_nn(reader)


new run with parameters:
{'batch_size': 128,
 'checkpoint_dir': './tf-data/checkpoints',
 'dilations': [1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128],
 'early_stopping_steps': 500,
 'enable_parameter_averaging': False,
 'filter_widths': [2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2

train size 137809
val size 7254
test size 145063


all parameters:
[('Variable:0', []),
 ('Variable_1:0', []),
 ('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-4/biases:0'

built graph


In [8]:
nn.restore()

restoring model parameters from ./tf-data/checkpoints/model-2560


INFO:tensorflow:Restoring parameters from ./tf-data/checkpoints/model-2560


In [9]:
start_time = time.time()
print(start_time)
nn.predict()
print('inference time:', time.time()-start_time)

1516819933.2566597


saving priors with shape (145063, 803) to ./tf-data/predictions/priors.npy
saving labels with shape (145063, 64) to ./tf-data/predictions/labels.npy
saving preds with shape (145063, 64) to ./tf-data/predictions/preds.npy
saving page_id with shape (145063,) to ./tf-data/predictions/page_id.npy


inference time: 150.48361659049988


In [None]:
4 mins