In [1]:
import os

import numpy as np
import tensorflow as tf

from dataframe import DataFrame
from datareader import DataReader
from tf_basemodel import TFBaseModel
from tf_utils import (
    time_distributed_dense_layer, temporal_convolution_layer,
    sequence_mean, sequence_smape, shape
)

In [8]:
root_paths = [
    "/data/kaggle-wikipedia/data2/",
    "/Users/jiayou/Dropbox/JuanCode/Insight/Project/tf-data/",
    "/Users/jiayou/Dropbox/Documents/JuanCode/Kaggle/Wikipedia/data2/",
    '/mnt/WD Black/Dropbox/JuanCode/Insight/Project/tf-data/',
    '/home/paperspace/Documents/insight/insight/tf-data/',
]
root = None
for p in root_paths:
    if os.path.exists(p):
        root = p
        break
print(root)

/home/paperspace/Documents/insight/insight/tf-data/


In [9]:
class cnn(TFBaseModel):

    def __init__(
        self,
        residual_channels=32,
        skip_channels=32,
        dilations=[2**i for i in range(8)]*3,
        filter_widths=[2 for i in range(8)]*3,
        num_decode_steps=64,
        **kwargs
    ):
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        self.dilations = dilations
        self.filter_widths = filter_widths
        self.num_decode_steps = num_decode_steps
        super(cnn, self).__init__(**kwargs)

    def transform(self, x):
        return tf.log(x + 1) - tf.expand_dims(self.log_x_encode_mean, 1)

    def inverse_transform(self, x):
        return tf.exp(x + tf.expand_dims(self.log_x_encode_mean, 1)) - 1

    def get_input_sequences(self):
        self.x_encode = tf.placeholder(tf.float32, [None, None])
        self.encode_len = tf.placeholder(tf.int32, [None])
        self.y_decode = tf.placeholder(tf.float32, [None, self.num_decode_steps])
        self.decode_len = tf.placeholder(tf.int32, [None])
        self.is_nan_encode = tf.placeholder(tf.float32, [None, None])
        self.is_nan_decode = tf.placeholder(tf.float32, [None, self.num_decode_steps])

        self.page_id = tf.placeholder(tf.int32, [None])
        self.project = tf.placeholder(tf.int32, [None])
        self.access = tf.placeholder(tf.int32, [None])
        self.agent = tf.placeholder(tf.int32, [None])

        self.keep_prob = tf.placeholder(tf.float32)
        self.is_training = tf.placeholder(tf.bool)

        self.log_x_encode_mean = sequence_mean(tf.log(self.x_encode + 1), self.encode_len)
        self.log_x_encode = self.transform(self.x_encode)
        self.x = tf.expand_dims(self.log_x_encode, 2)

        self.encode_features = tf.concat([
            tf.expand_dims(self.is_nan_encode, 2),
            tf.expand_dims(tf.cast(tf.equal(self.x_encode, 0.0), tf.float32), 2),
            tf.tile(tf.reshape(self.log_x_encode_mean, (-1, 1, 1)), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.project, 9), 1), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.access, 3), 1), (1, tf.shape(self.x_encode)[1], 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.agent, 2), 1), (1, tf.shape(self.x_encode)[1], 1)),
        ], axis=2)

        decode_idx = tf.tile(tf.expand_dims(tf.range(self.num_decode_steps), 0), (tf.shape(self.y_decode)[0], 1))
        self.decode_features = tf.concat([
            tf.one_hot(decode_idx, self.num_decode_steps),
            tf.tile(tf.reshape(self.log_x_encode_mean, (-1, 1, 1)), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.project, 9), 1), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.access, 3), 1), (1, self.num_decode_steps, 1)),
            tf.tile(tf.expand_dims(tf.one_hot(self.agent, 2), 1), (1, self.num_decode_steps, 1)),
        ], axis=2)

        return self.x

    def encode(self, x, features):
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-encode'
        )

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2*self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-encode-{}'.format(i)
            )
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-encode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-encode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-encode-2')

        return y_hat, conv_inputs[:-1]

    def initialize_decode_params(self, x, features):
        x = tf.concat([x, features], axis=2)

        inputs = time_distributed_dense_layer(
            inputs=x,
            output_units=self.residual_channels,
            activation=tf.nn.tanh,
            scope='x-proj-decode'
        )

        skip_outputs = []
        conv_inputs = [inputs]
        for i, (dilation, filter_width) in enumerate(zip(self.dilations, self.filter_widths)):
            dilated_conv = temporal_convolution_layer(
                inputs=inputs,
                output_units=2*self.residual_channels,
                convolution_width=filter_width,
                causal=True,
                dilation_rate=[dilation],
                scope='dilated-conv-decode-{}'.format(i)
            )
            conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=2)
            dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

            outputs = time_distributed_dense_layer(
                inputs=dilated_conv,
                output_units=self.skip_channels + self.residual_channels,
                scope='dilated-conv-proj-decode-{}'.format(i)
            )
            skips, residuals = tf.split(outputs, [self.skip_channels, self.residual_channels], axis=2)

            inputs += residuals
            conv_inputs.append(inputs)
            skip_outputs.append(skips)

        skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=2))
        h = time_distributed_dense_layer(skip_outputs, 128, scope='dense-decode-1', activation=tf.nn.relu)
        y_hat = time_distributed_dense_layer(h, 1, scope='dense-decode-2')
        return y_hat

    def decode(self, x, conv_inputs, features):
        batch_size = tf.shape(x)[0]

        # initialize state tensor arrays
        state_queues = []
        for i, (conv_input, dilation) in enumerate(zip(conv_inputs, self.dilations)):
            batch_idx = tf.range(batch_size)
            batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation))
            batch_idx = tf.reshape(batch_idx, [-1])

            queue_begin_time = self.encode_len - dilation - 1
            temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0)
            temporal_idx = tf.reshape(temporal_idx, [-1])

            idx = tf.stack([batch_idx, temporal_idx], axis=1)
            slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2)))

            layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.num_decode_steps)
            layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2)))
            state_queues.append(layer_ta)

        # initialize feature tensor array
        features_ta = tf.TensorArray(dtype=tf.float32, size=self.num_decode_steps)
        features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2)))

        # initialize output tensor array
        emit_ta = tf.TensorArray(size=self.num_decode_steps, dtype=tf.float32)

        # initialize other loop vars
        elements_finished = 0 >= self.decode_len
        time = tf.constant(0, dtype=tf.int32)

        # get initial x input
        current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1)
        initial_input = tf.gather_nd(x, current_idx)

        def loop_fn(time, current_input, queues):
            current_features = features_ta.read(time)
            current_input = tf.concat([current_input, current_features], axis=1)

            with tf.variable_scope('x-proj-decode', reuse=True):
                w_x_proj = tf.get_variable('weights')
                b_x_proj = tf.get_variable('biases')
                x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj)

            skip_outputs, updated_queues = [], []
            for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs, queues, self.dilations)):

                state = queue.read(time)
                with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True):
                    w_conv = tf.get_variable('weights'.format(i))
                    b_conv = tf.get_variable('biases'.format(i))
                    dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv
                conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1)
                dilated_conv = tf.nn.tanh(conv_filter)*tf.nn.sigmoid(conv_gate)

                with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True):
                    w_proj = tf.get_variable('weights'.format(i))
                    b_proj = tf.get_variable('biases'.format(i))
                    concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj
                skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1)

                x_proj += residuals
                skip_outputs.append(skips)
                updated_queues.append(queue.write(time + dilation, x_proj))

            skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1))
            with tf.variable_scope('dense-decode-1', reuse=True):
                w_h = tf.get_variable('weights')
                b_h = tf.get_variable('biases')
                h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h)

            with tf.variable_scope('dense-decode-2', reuse=True):
                w_y = tf.get_variable('weights')
                b_y = tf.get_variable('biases')
                y_hat = tf.matmul(h, w_y) + b_y

            elements_finished = (time >= self.decode_len)
            finished = tf.reduce_all(elements_finished)

            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, 1], dtype=tf.float32),
                lambda: y_hat
            )
            next_elements_finished = (time >= self.decode_len - 1)

            return (next_elements_finished, next_input, updated_queues)

        def condition(unused_time, elements_finished, *_):
            return tf.logical_not(tf.reduce_all(elements_finished))

        def body(time, elements_finished, emit_ta, *state_queues):
            (next_finished, emit_output, state_queues) = loop_fn(time, initial_input, state_queues)

            emit = tf.where(elements_finished, tf.zeros_like(emit_output), emit_output)
            emit_ta = emit_ta.write(time, emit)

            elements_finished = tf.logical_or(elements_finished, next_finished)
            return [time + 1, elements_finished, emit_ta] + list(state_queues)

        returned = tf.while_loop(
            cond=condition,
            body=body,
            loop_vars=[time, elements_finished, emit_ta] + state_queues
        )

        outputs_ta = returned[2]
        y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2))
        return y_hat

    def calculate_loss(self):
        x = self.get_input_sequences()

        y_hat_encode, conv_inputs = self.encode(x, features=self.encode_features)
        self.initialize_decode_params(x, features=self.decode_features)
        y_hat_decode = self.decode(y_hat_encode, conv_inputs, features=self.decode_features)
        y_hat_decode = self.inverse_transform(tf.squeeze(y_hat_decode, 2))
        y_hat_decode = tf.nn.relu(y_hat_decode)

        self.labels = self.y_decode
        self.preds = y_hat_decode
        self.loss = sequence_smape(self.labels, self.preds, self.decode_len, self.is_nan_decode)

        self.prediction_tensors = {
            'priors': self.x_encode,
            'labels': self.labels,
            'preds': self.preds,
            'page_id': self.page_id,
        }

        return self.loss

In [10]:
def get_nn(reader):
    return cnn(
        reader=reader,
        log_dir=os.path.join('./tf-data', 'logs'),
        checkpoint_dir=os.path.join('./tf-data', 'checkpoints'),
        prediction_dir=os.path.join('./tf-data', 'predictions'),
        optimizer='adam',
        learning_rate=.001,
        batch_size=128,
        num_training_steps=200000,
        early_stopping_steps=5000,
        warm_start_init_step=0,
        regularization_constant=0.0,
        keep_prob=1.0,
        enable_parameter_averaging=False,
        num_restarts=2,
        min_steps_to_checkpoint=500,
        log_interval=10,
        num_validation_batches=1,
        grad_clip=20,
        residual_channels=32,
        skip_channels=32,
        dilations=[2**i for i in range(8)]*3,
        filter_widths=[2 for i in range(8)]*3,
        num_decode_steps=64,
    )

reader = DataReader(
    data_dir=os.path.join(root, 'processed/')
)

nn = get_nn(reader)  


new run with parameters:
{'batch_size': 128,
 'checkpoint_dir': './tf-data/checkpoints',
 'dilations': [1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128],
 'early_stopping_steps': 5000,
 'enable_parameter_averaging': False,
 'filter_widths': [2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   

train size 13781
val size 726
test size 14507


all parameters:
[('Variable:0', []),
 ('Variable_1:0', []),
 ('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-4/biases:0'

trainable parameters:
[('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-4/biases:0', [64]),
 ('dilated-conv-proj-encode-4

trainable parameter count:
501378


built graph


In [11]:
nn.fit()

[[step        0]]     [[train]]     loss: 0.60102844       [[val]]     loss: 0.61454183       
[[step       10]]     [[train]]     loss: 0.5875113        [[val]]     loss: 0.59680455       
[[step       20]]     [[train]]     loss: 0.57876272       [[val]]     loss: 0.59202011       
[[step       30]]     [[train]]     loss: 0.56607331       [[val]]     loss: 0.57376309       
[[step       40]]     [[train]]     loss: 0.5524757        [[val]]     loss: 0.55939981       
[[step       50]]     [[train]]     loss: 0.54075298       [[val]]     loss: 0.5477844        
[[step       60]]     [[train]]     loss: 0.53027397       [[val]]     loss: 0.53699961       
[[step       70]]     [[train]]     loss: 0.52077883       [[val]]     loss: 0.52768974       
[[step       80]]     [[train]]     loss: 0.51292654       [[val]]     loss: 0.51974425       
[[step       90]]     [[train]]     loss: 0.50753124       [[val]]     loss: 0.51416126       
[[step      100]]     [[train]]     loss: 0.501168

saving model to ./tf-data/checkpoints/model
[[step      790]]     [[train]]     loss: 0.40866867       [[val]]     loss: 0.41223429       
saving model to ./tf-data/checkpoints/model
[[step      800]]     [[train]]     loss: 0.4094444        [[val]]     loss: 0.41240065       
[[step      810]]     [[train]]     loss: 0.40732446       [[val]]     loss: 0.41207934       
saving model to ./tf-data/checkpoints/model
[[step      820]]     [[train]]     loss: 0.40552128       [[val]]     loss: 0.41150279       
saving model to ./tf-data/checkpoints/model
[[step      830]]     [[train]]     loss: 0.40589433       [[val]]     loss: 0.41176987       
[[step      840]]     [[train]]     loss: 0.4055922        [[val]]     loss: 0.41045441       
saving model to ./tf-data/checkpoints/model
[[step      850]]     [[train]]     loss: 0.40564817       [[val]]     loss: 0.41018335       
saving model to ./tf-data/checkpoints/model
[[step      860]]     [[train]]     loss: 0.4058136        [[val]]     

[[step     1560]]     [[train]]     loss: 0.39557225       [[val]]     loss: 0.4013886        
[[step     1570]]     [[train]]     loss: 0.39469284       [[val]]     loss: 0.39969207       
saving model to ./tf-data/checkpoints/model
[[step     1580]]     [[train]]     loss: 0.39202035       [[val]]     loss: 0.39970471       
[[step     1590]]     [[train]]     loss: 0.39134035       [[val]]     loss: 0.39866214       
saving model to ./tf-data/checkpoints/model
[[step     1600]]     [[train]]     loss: 0.39194732       [[val]]     loss: 0.39888875       
[[step     1610]]     [[train]]     loss: 0.39408801       [[val]]     loss: 0.39855539       
saving model to ./tf-data/checkpoints/model
[[step     1620]]     [[train]]     loss: 0.39391148       [[val]]     loss: 0.39872364       
[[step     1630]]     [[train]]     loss: 0.39608431       [[val]]     loss: 0.39904318       
[[step     1640]]     [[train]]     loss: 0.39590028       [[val]]     loss: 0.39899616       
[[step     16

[[step     2360]]     [[train]]     loss: 0.39136154       [[val]]     loss: 0.3963756        
[[step     2370]]     [[train]]     loss: 0.39029579       [[val]]     loss: 0.39660392       
[[step     2380]]     [[train]]     loss: 0.39056017       [[val]]     loss: 0.39679327       
[[step     2390]]     [[train]]     loss: 0.39048703       [[val]]     loss: 0.39611768       
[[step     2400]]     [[train]]     loss: 0.38949144       [[val]]     loss: 0.39646309       
[[step     2410]]     [[train]]     loss: 0.38934251       [[val]]     loss: 0.39688711       
[[step     2420]]     [[train]]     loss: 0.39104938       [[val]]     loss: 0.39530507       
[[step     2430]]     [[train]]     loss: 0.39312375       [[val]]     loss: 0.39565849       
[[step     2440]]     [[train]]     loss: 0.39232686       [[val]]     loss: 0.396686         
[[step     2450]]     [[train]]     loss: 0.3910587        [[val]]     loss: 0.39694649       
[[step     2460]]     [[train]]     loss: 0.392332

KeyboardInterrupt: 

In [15]:
reader = DataReader(
    data_dir=os.path.join(root, 'processed_full/')
)

nn = get_nn(reader)


new run with parameters:
{'batch_size': 128,
 'checkpoint_dir': './tf-data/checkpoints',
 'dilations': [1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128,
               1,
               2,
               4,
               8,
               16,
               32,
               64,
               128],
 'early_stopping_steps': 5000,
 'enable_parameter_averaging': False,
 'filter_widths': [2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   2,
                   

train size 137809
val size 7254
test size 145063


all parameters:
[('Variable:0', []),
 ('Variable_1:0', []),
 ('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-4/biases:0'

trainable parameters:
[('x-proj-encode/weights:0', [18, 32]),
 ('x-proj-encode/biases:0', [32]),
 ('dilated-conv-encode-0/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-0/biases:0', [64]),
 ('dilated-conv-proj-encode-0/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-0/biases:0', [64]),
 ('dilated-conv-encode-1/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-1/biases:0', [64]),
 ('dilated-conv-proj-encode-1/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-1/biases:0', [64]),
 ('dilated-conv-encode-2/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-2/biases:0', [64]),
 ('dilated-conv-proj-encode-2/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-2/biases:0', [64]),
 ('dilated-conv-encode-3/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-3/biases:0', [64]),
 ('dilated-conv-proj-encode-3/weights:0', [32, 64]),
 ('dilated-conv-proj-encode-3/biases:0', [64]),
 ('dilated-conv-encode-4/weights:0', [2, 32, 64]),
 ('dilated-conv-encode-4/biases:0', [64]),
 ('dilated-conv-proj-encode-4

trainable parameter count:
501378


built graph


In [16]:
nn.restore()

restoring model parameters from ./tf-data/checkpoints/model-2310


INFO:tensorflow:Restoring parameters from ./tf-data/checkpoints/model-2310


In [17]:
nn.predict()

saving priors with shape (145063, 803) to ./tf-data/predictions/priors.npy
saving labels with shape (145063, 64) to ./tf-data/predictions/labels.npy
saving preds with shape (145063, 64) to ./tf-data/predictions/preds.npy
saving page_id with shape (145063,) to ./tf-data/predictions/page_id.npy


In [None]:
4 mins