## User Data

In [2]:
import os
import pandas as pd
from IPython.display import clear_output # Count iteration times
# from datetime import datetime # Record time

In [2]:
def parse_order(x):
    series = pd.Series(dtype='object') ### specify dtype

    series['products'] = '_'.join(x['product_id'].values.astype(str).tolist())
    series['reorders'] = '_'.join(x['reordered'].values.astype(str).tolist())
    series['aisles'] = '_'.join(x['aisle_id'].values.astype(str).tolist())
    series['departments'] = '_'.join(x['department_id'].values.astype(str).tolist())

    series['order_number'] = x['order_number'].iloc[0]
    series['order_dow'] = x['order_dow'].iloc[0]
    series['order_hour'] = x['order_hour_of_day'].iloc[0]
    series['days_since_prior_order'] = x['days_since_prior_order'].iloc[0]
    
    # Increment the counter and print the current count
    global global_counter 
    global_counter += 1
    clear_output(wait=True)
    print(f"total loops run: {global_counter}")

    return series

In [3]:
def parse_user(x):
    parsed_orders = x.groupby('order_id', sort=False).apply(parse_order)

    series = pd.Series(dtype='object')

    series['order_ids'] = ' '.join(parsed_orders.index.map(str).tolist())
    series['order_numbers'] = ' '.join(parsed_orders['order_number'].map(str).tolist())
    series['order_dows'] = ' '.join(parsed_orders['order_dow'].map(str).tolist())
    series['order_hours'] = ' '.join(parsed_orders['order_hour'].map(str).tolist())
    series['days_since_prior_orders'] = ' '.join(parsed_orders['days_since_prior_order'].map(str).tolist())

    series['product_ids'] = ' '.join(parsed_orders['products'].values.astype(str).tolist())
    series['aisle_ids'] = ' '.join(parsed_orders['aisles'].values.astype(str).tolist())
    series['department_ids'] = ' '.join(parsed_orders['departments'].values.astype(str).tolist())
    series['reorders'] = ' '.join(parsed_orders['reorders'].values.astype(str).tolist())

    series['eval_set'] = x['eval_set'].values[-1]

    return series

In [4]:
os.getcwd() # Check the current path

'/storage/work/z/zbh5185/Instacart_Market/preprocessing'

In [5]:
orders = pd.read_csv('../data/raw/orders.csv')
prior_products = pd.read_csv('../data/raw/order_products__prior.csv')
train_products = pd.read_csv('../data/raw/order_products__train.csv')
order_products = pd.concat([prior_products, train_products], axis=0)
products = pd.read_csv('../data/raw/products.csv')

In [6]:
df = orders.merge(order_products, how='left', on='order_id')
df = df.merge(products, how='left', on='product_id')
df['days_since_prior_order'] = df['days_since_prior_order'].fillna(0).astype(int)
null_cols = ['product_id', 'aisle_id', 'department_id', 'add_to_cart_order', 'reordered']
df[null_cols] = df[null_cols].fillna(0).astype(int)

In [7]:
# DataFrame Check
print(prior_products.shape)
print(train_products.shape)
print(df['eval_set'].unique())

(32434489, 4)
(1384617, 4)
['prior' 'train' 'test']


In [8]:
if not os.path.isdir('../data/processed'):
    os.makedirs('../data/processed')

In [9]:
### Goal: sample from df, with each aisle and department are sampled at least tenth.

# Identify all unique aisles and departments
unique_aisles = df['aisle_id'].unique()
unique_departments = df['department_id'].unique()

# Initialize an empty dataframe to store the sampled data
sampled_df = pd.DataFrame()

# Sample at least ten rows for each aisle and department
for aisle in unique_aisles:
    sampled_df = sampled_df.append(df[df['aisle_id'] == aisle].sample(n=10))

for department in unique_departments:
    sampled_df = sampled_df.append(df[df['department_id'] == department].sample(n=10))

# Calculate the remaining number of rows needed to make up 1% of the data
remaining_rows = int(0.01 * len(df)) - len(sampled_df)

# Sample the remaining rows randomly from the data
remaining_df = df.drop(sampled_df.index)
remaining_df_train = remaining_df[remaining_df['eval_set'] == 'train'] # Mainly sample training data
remaining_sampled_df_train = remaining_df_train.sample(n=int(0.99*remaining_rows))

remaining_df_test = remaining_df[remaining_df['eval_set'] != 'train'] # Include some other data
remaining_sampled_df_test = remaining_df_test.sample(n=int(0.99*remaining_rows))

# Append the remaining sampled rows to the sampled dataframe
sampled_df = pd.concat([sampled_df, remaining_sampled_df_train, remaining_sampled_df_test])

# Reset the index of the sampled dataframe
sampled_df = sampled_df.reset_index(drop=True)


In [None]:
# user_data = df.groupby('user_id', sort=False).apply(parse_user).reset_index()
# Initialize a global counter
global_counter = 0
user_data = sampled_df.groupby('user_id', sort=False).apply(parse_user).reset_index()

total loops run: 76769


In [None]:
user_data.to_csv('../data/processed/user_data.csv', index=False)

##  Product_Data

In [13]:
df = pd.read_csv('../data/processed/user_data.csv')

products = pd.read_csv('../data/raw/products.csv')
product_to_aisle = dict(zip(products['product_id'], products['aisle_id']))
product_to_department = dict(zip(products['product_id'], products['department_id']))
product_to_name = dict(zip(products['product_id'], products['product_name']))

user_ids = []
product_ids = []
aisle_ids = []
department_ids = []
product_names = []
eval_sets = []

is_ordered_histories = []
index_in_order_histories = []
order_size_histories = []
reorder_size_histories = []
order_dow_histories = []
order_hour_histories = []
days_since_prior_order_histories = []
order_number_histories = []

labels = []

longest = 0
    

In [14]:
longest = 0
for _, row in df.iterrows():
    if _ % 10000 == 0:
        print(_)
        data = [
        user_ids,
        product_ids,
        aisle_ids,
        department_ids,
        product_names,
        is_ordered_histories,
        index_in_order_histories,
        order_size_histories,
        reorder_size_histories,
        order_dow_histories,
        order_hour_histories,
        days_since_prior_order_histories,
        order_number_histories,
        labels,
        eval_sets
        ]
        # Length Check
        print(list(map(len, data)))

    user_id = row['user_id']
    eval_set = row['eval_set']
    products = row['product_ids']

    products, next_products = ' '.join(products.split()[:-1]), products.split()[-1]

    reorders = row['reorders']
    reorders, next_reorders = ' '.join(reorders.split()[:-1]), reorders.split()[-1]

    product_set = set([int(j) for i in products.split() for j in i.split('_')])
    next_product_set = set([int(i) for i in next_products.split('_')])

    orders = [map(int, i.split('_')) for i in products.split()]
    reorders = [map(int, i.split('_')) for i in reorders.split()]
    next_reorders = map(int, next_reorders.split('_'))

    for product_id in product_set:

        user_ids.append(user_id)
        product_ids.append(product_id)
        labels.append(int(product_id in next_product_set) if eval_set == 'train' else -1)
        eval_sets.append(eval_set) # Newly added

        ### Handle Null
        if product_id in product_to_aisle:
            aisle_ids.append(product_to_aisle[product_id])
        else:
            aisle_ids.append('0')  # or some other default value

        if product_id in product_to_department:
            department_ids.append(product_to_department[product_id])
        else:
            department_ids.append('0')  # or some other default value

        if product_id in product_to_name:
            product_names.append(product_to_name[product_id])
        else:
            product_names.append('0')  # or some other default value

        is_ordered = []
        index_in_order = []
        order_size = []
        reorder_size = []

        prior_products = set()
        for order in orders:
            is_ordered.append(str(int(product_id in order)))
            index_in_order.append(str(order.index(product_id) + 1) if product_id in order else '0')
            order_size.append(str(len(list(order))))
            reorder_size.append(str(len(list(prior_products & set(order)))))
            prior_products |= set(order)

        is_ordered = ' '.join(is_ordered)
        index_in_order = ' '.join(index_in_order)
        order_size = ' '.join(order_size)
        reorder_size = ' '.join(reorder_size)

        is_ordered_histories.append(is_ordered)
        index_in_order_histories.append(index_in_order)
        order_size_histories.append(order_size)
        reorder_size_histories.append(reorder_size)
        order_dow_histories.append(row['order_dows'])
        order_hour_histories.append(row['order_hours'])
        days_since_prior_order_histories.append(row['days_since_prior_orders'])
        order_number_histories.append(row['order_numbers'])

    user_ids.append(user_id)
    product_ids.append(0)
    labels.append(int(max(next_reorders) == 0) if eval_set == 'train' else -1)

    aisle_ids.append(0)
    department_ids.append(0)
    product_names.append(0)
    eval_sets.append(eval_set)

    is_ordered = []
    index_in_order = []
    order_size = []
    reorder_size = []

    for reorder in reorders:
        is_ordered.append(str(int(max(reorder) == 0)))
        index_in_order.append(str(0))
        order_size.append(str(len(list(reorder))))
        reorder_size.append(str(sum(reorder)))

    is_ordered = ' '.join(is_ordered)
    index_in_order = ' '.join(index_in_order)
    order_size = ' '.join(order_size)
    reorder_size = ' '.join(reorder_size)

    is_ordered_histories.append(is_ordered)
    index_in_order_histories.append(index_in_order)
    order_size_histories.append(order_size)
    reorder_size_histories.append(reorder_size)
    order_dow_histories.append(row['order_dows'])
    order_hour_histories.append(row['order_hours'])
    days_since_prior_order_histories.append(row['days_since_prior_orders'])
    order_number_histories.append(row['order_numbers'])

0
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
10000
[56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753, 56753]
20000
[110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400, 110400]
30000
[160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340, 160340]
40000
[208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372, 208372]
50000
[252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955, 252955]
60000
[294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419, 294419]
70000
[332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499, 332499]
80000
[366897, 366897, 366897, 366897, 366897, 366897, 366897, 366897, 366897,

In [15]:
data = [
        user_ids,
        product_ids,
        aisle_ids,
        department_ids,
        product_names,
        is_ordered_histories,
        index_in_order_histories,
        order_size_histories,
        reorder_size_histories,
        order_dow_histories,
        order_hour_histories,
        days_since_prior_order_histories,
        order_number_histories,
        labels,
        eval_sets
    ]
# Length Check

list(map(len, data))

[581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809,
 581809]

In [16]:
columns = [
    'user_id',
    'product_id',
    'aisle_id',
    'department_id',
    'product_name',
    'is_ordered_history',
    'index_in_order_history',
    'order_size_history',
    'reorder_size_history',
    'order_dow_history',
    'order_hour_history',
    'days_since_prior_order_history',
    'order_number_history',
    'label',
    'eval_set'
]
if not os.path.isdir('../data/processed'):
    os.makedirs('../data/processed')

df = pd.DataFrame(dict(zip(columns, data)))
df.to_csv('../data/processed/product_data.csv', index=False)


### Added for Tonight Running, Delete After It

In [None]:
from collections import Counter
import os
import numpy as np
import pandas as pd

def pad_1d(array, max_len):
    array = list(array)[:max_len]
    length = len(array)
    padded = array + [0]*(max_len - len(array))
    return padded, length


def make_word_idx(product_names):
    words = [word for name in product_names for word in name.split()]
    word_counts = Counter(words)

    max_id = 1
    word_idx = {}
    for word, count in word_counts.items():
        if count < 10:
            word_idx[word] = 0
        else:
            word_idx[word] = max_id
            max_id += 1

    return word_idx


def encode_text(text, word_idx):
    return ' '.join([str(word_idx[i]) for i in text.split()]) if text else '0'

# Embedding debugging
product_data = pd.read_csv('../data/processed/product_data.csv')
# Remove floats
product_data = product_data.loc[product_data['product_name'].apply(lambda x: isinstance(x, str)),:]
product_data = product_data.loc[product_data['is_ordered_history'].apply(lambda x: isinstance(x, str)),:]

product_data['product_name'] = product_data['product_name'].map(lambda x: x.lower() if type(x)==str else 0)

product_df = pd.read_csv('../data/raw/products.csv')
product_df['product_name'] = product_df['product_name'].map(lambda x: x.lower())

word_idx = make_word_idx(product_df['product_name'].tolist())
product_data['product_name_encoded'] = product_data['product_name'].map(lambda x: encode_text(x, word_idx))

# Check Data Shape
product_data.shape
# Check Label type
label_col = product_data['label']
label_set = set(label_col)
label_count = {i: sum(label_col == i) for i in label_set}
print(label_count)

# Check the property of list product_name
mixlist = product_data['product_name']
mixlist_type = [type(s) for s in mixlist]
mixlist_type_dic = {t: mixlist_type.count(t) for t in set(mixlist_type)}
print(mixlist_type_dic)

num_rows = len(product_data)

user_id = np.zeros(shape=[num_rows], dtype=np.int32)
product_id = np.zeros(shape=[num_rows], dtype=np.int32)
aisle_id = np.zeros(shape=[num_rows], dtype=np.int16)
department_id = np.zeros(shape=[num_rows], dtype=np.int8)
eval_set = np.zeros(shape=[num_rows], dtype='S5')
label = np.zeros(shape=[num_rows], dtype=np.int8)

is_ordered_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
index_in_order_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
order_dow_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
order_hour_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
days_since_prior_order_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
order_size_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
reorder_size_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
order_number_history = np.zeros(shape=[num_rows, 100], dtype=np.int8)
product_name = np.zeros(shape=[num_rows, 30], dtype=np.int32)
product_name_length = np.zeros(shape=[num_rows], dtype=np.int8)
history_length = np.zeros(shape=[num_rows], dtype=np.int8)

# Length check
print(user_id.shape, is_ordered_history.shape, order_dow_history.shape)

np.save('../models/rnn_product/data/user_id.npy', user_id)
np.save('../models/rnn_product/data/product_id.npy', product_id)
np.save('../models/rnn_product/data/aisle_id.npy', aisle_id)
np.save('../models/rnn_product/data/department_id.npy', department_id)
np.save('../models/rnn_product/data/eval_set.npy', eval_set)
np.save('../models/rnn_product/data/label.npy', label)

np.save('../models/rnn_product/data/is_ordered_history.npy', is_ordered_history)
np.save('../models/rnn_product/data/index_in_order_history.npy', index_in_order_history)
np.save('../models/rnn_product/data/order_dow_history.npy', order_dow_history)
np.save('../models/rnn_product/data/order_hour_history.npy', order_hour_history)
np.save('../models/rnn_product/data/days_since_prior_order_history.npy', days_since_prior_order_history)
np.save('../models/rnn_product/data/order_size_history.npy', order_size_history)
np.save('../models/rnn_product/data/reorder_size_history.npy', reorder_size_history)
np.save('../models/rnn_product/data/order_number_history.npy', order_number_history)
np.save('../models/rnn_product/data/product_name.npy', product_name)
np.save('../models/rnn_product/data/product_name_length.npy', product_name_length)
np.save('../models/rnn_product/data/history_length.npy', history_length)

import os
import numpy as np
import sys

import tensorflow as tf

print(tf.__version__)

# Personalized Function
sys.path.append(os.path.join(os.getcwd(), '../models'))
from data_frame import DataFrame
from tf_utils import lstm_layer, time_distributed_dense_layer, dense_layer, sequence_log_loss, wavenet
from tf_base_model import TFBaseModel

# Additional packages for python 2 functions
from importlib import reload

class DataReader(object):

    def __init__(self, data_dir):
        data_cols = [
            'user_id',
            'product_id',
            'aisle_id',
            'department_id',
            'is_ordered_history',
            'index_in_order_history',
            'order_dow_history',
            'order_hour_history',
            'days_since_prior_order_history',
            'order_size_history',
            'reorder_size_history',
            'order_number_history',
            'history_length',
            'product_name',
            'product_name_length',
            'eval_set',
            'label'
        ]
        data = [np.load(os.path.join(data_dir, '{}.npy'.format(i)), mmap_mode='r') for i in data_cols]
        self.test_df = DataFrame(columns=data_cols, data=data)

        print(self.test_df.shapes())
        print("loaded data")

        # Split the data into training and validation sets
        self.train_df, self.val_df = self.test_df.train_test_split(train_size=0.9)
        # Output set information
        print('train size', len(self.train_df))
        print('validation size', len(self.val_df))
        print('test size', len(self.test_df))
        
    def train_batch_generator(self, batch_size):
        return self.batch_generator(
            batch_size=batch_size,
            df=self.train_df,
            shuffle=True,
            num_epochs=10000,
            is_test=False
        )

    def val_batch_generator(self, batch_size):
        return self.batch_generator(
            batch_size=batch_size,
            df=self.val_df,
            shuffle=True,
            num_epochs=10000,
            is_test=False
        )

    def test_batch_generator(self, batch_size):
        return self.batch_generator(
            batch_size=batch_size,
            df=self.test_df,
            shuffle=False,
            num_epochs=1,
            is_test=True
        )

    def batch_generator(self, batch_size, df, shuffle=True, num_epochs=10000, is_test=False):
        batch_gen = df.batch_generator(batch_size, shuffle=shuffle, num_epochs=num_epochs, allow_smaller_final_batch=is_test)
        for batch in batch_gen:
            batch['order_dow_history'] = np.roll(batch['order_dow_history'], -1, axis=1)
            batch['order_hour_history'] = np.roll(batch['order_hour_history'], -1, axis=1)
            batch['days_since_prior_order_history'] = np.roll(batch['days_since_prior_order_history'], -1, axis=1)
            batch['order_number_history'] = np.roll(batch['order_number_history'], -1, axis=1)
            batch['next_is_ordered'] = np.roll(batch['is_ordered_history'], -1, axis=1)
            batch['is_none'] = batch['product_id'] == 0
            if not is_test:
                batch['history_length'] = batch['history_length'] - 1
            yield batch
            

class rnn(TFBaseModel):

    def __init__(self, lstm_size, dilations, filter_widths, skip_channels, residual_channels, **kwargs):
        self.lstm_size = lstm_size
        self.dilations = dilations
        self.filter_widths = filter_widths
        self.skip_channels = skip_channels
        self.residual_channels = residual_channels
        super(rnn, self).__init__(**kwargs)

    def calculate_loss(self):
        x = self.get_input_sequences()
        preds = self.calculate_outputs(x)
        loss = sequence_log_loss(self.next_is_ordered, preds, self.history_length, 100)
        return loss

    def get_input_sequences(self):
        self.user_id = tf.placeholder(tf.int32, [None])
        self.product_id = tf.placeholder(tf.int32, [None])
        self.aisle_id = tf.placeholder(tf.int32, [None])
        self.department_id = tf.placeholder(tf.int32, [None])
        self.is_none = tf.placeholder(tf.int32, [None])
        self.history_length = tf.placeholder(tf.int32, [None])

        self.is_ordered_history = tf.placeholder(tf.int32, [None, 100])
        self.index_in_order_history = tf.placeholder(tf.int32, [None, 100])
        self.order_dow_history = tf.placeholder(tf.int32, [None, 100])
        self.order_hour_history = tf.placeholder(tf.int32, [None, 100])
        self.days_since_prior_order_history = tf.placeholder(tf.int32, [None, 100])
        self.order_size_history = tf.placeholder(tf.int32, [None, 100])
        self.reorder_size_history = tf.placeholder(tf.int32, [None, 100])
        self.order_number_history = tf.placeholder(tf.int32, [None, 100])
        self.product_name = tf.placeholder(tf.int32, [None, 30])
        self.product_name_length = tf.placeholder(tf.int32, [None])
        self.next_is_ordered = tf.placeholder(tf.int32, [None, 100])

        self.keep_prob = tf.placeholder(tf.float32)
        self.is_training = tf.placeholder(tf.bool)

        # product data
        product_embeddings = tf.get_variable(
            name='product_embeddings',
            shape=[50000, self.lstm_size],
            dtype=tf.float32
        )
        aisle_embeddings = tf.get_variable(
            name='aisle_embeddings',
            shape=[250, 50],
            dtype=tf.float32
        )
        department_embeddings = tf.get_variable(
            name='department_embeddings',
            shape=[50, 10],
            dtype=tf.float32
        )
        product_names = tf.one_hot(self.product_name, 2532)
        product_names = tf.reduce_max(product_names, 1)
        product_names = dense_layer(product_names, 100, activation=tf.nn.relu)

        is_none = tf.cast(tf.expand_dims(self.is_none, 1), tf.float32)

        x_product = tf.concat([
            tf.nn.embedding_lookup(product_embeddings, self.product_id),
            tf.nn.embedding_lookup(aisle_embeddings, self.aisle_id),
            tf.nn.embedding_lookup(department_embeddings, self.department_id),
            is_none,
            product_names
        ], axis=1)
        x_product = tf.tile(tf.expand_dims(x_product, 1), (1, 100, 1))

        # user data
        user_embeddings = tf.get_variable(
            name='user_embeddings',
            shape=[207000, self.lstm_size],
            dtype=tf.float32
        )
        x_user = tf.nn.embedding_lookup(user_embeddings, self.user_id)
        x_user = tf.tile(tf.expand_dims(x_user, 1), (1, 100, 1))

        # sequence data
        is_ordered_history = tf.one_hot(self.is_ordered_history, 2)
        index_in_order_history = tf.one_hot(self.index_in_order_history, 20)
        order_dow_history = tf.one_hot(self.order_dow_history, 8)
        order_hour_history = tf.one_hot(self.order_hour_history, 25)
        days_since_prior_order_history = tf.one_hot(self.days_since_prior_order_history, 31)
        order_size_history = tf.one_hot(self.order_size_history, 60)
        reorder_size_history = tf.one_hot(self.reorder_size_history, 50)
        order_number_history = tf.one_hot(self.order_number_history, 101)

        index_in_order_history_scalar = tf.expand_dims(tf.cast(self.index_in_order_history, tf.float32) / 20.0, 2)
        order_dow_history_scalar = tf.expand_dims(tf.cast(self.order_dow_history, tf.float32) / 8.0, 2)
        order_hour_history_scalar = tf.expand_dims(tf.cast(self.order_hour_history, tf.float32) / 25.0, 2)
        days_since_prior_order_history_scalar = tf.expand_dims(tf.cast(self.days_since_prior_order_history, tf.float32) / 31.0, 2)
        order_size_history_scalar = tf.expand_dims(tf.cast(self.order_size_history, tf.float32) / 60.0, 2)
        reorder_size_history_scalar = tf.expand_dims(tf.cast(self.reorder_size_history, tf.float32) / 50.0, 2)
        order_number_history_scalar = tf.expand_dims(tf.cast(self.order_number_history, tf.float32) / 100.0, 2)

        x_history = tf.concat([
            is_ordered_history,
            index_in_order_history,
            order_dow_history,
            order_hour_history,
            days_since_prior_order_history,
            order_size_history,
            reorder_size_history,
            order_number_history,
            index_in_order_history_scalar,
            order_dow_history_scalar,
            order_hour_history_scalar,
            days_since_prior_order_history_scalar,
            order_size_history_scalar,
            reorder_size_history_scalar,
            order_number_history_scalar,
        ], axis=2)

        x = tf.concat([x_history, x_product, x_user], axis=2)

        return x

    def calculate_outputs(self, x):
        h = lstm_layer(x, self.history_length, self.lstm_size)
        c = wavenet(x, self.dilations, self.filter_widths, self.skip_channels, self.residual_channels)
        h = tf.concat([h, c, x], axis=2)

        self.h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1')
        y_hat = time_distributed_dense_layer(self.h_final, 1, activation=tf.nn.sigmoid, scope='dense-2')
        y_hat = tf.squeeze(y_hat, 2)

        final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), tf.maximum(self.history_length - 1, 0)], axis=1)
        self.final_states = tf.gather_nd(self.h_final, final_temporal_idx)
        self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx)

        self.prediction_tensors = {
            'user_ids': self.user_id,
            'product_ids': self.product_id,
            'final_states': self.final_states,
            'predictions': self.final_predictions
        }

        return y_hat


base_dir = './'

dr = DataReader(data_dir=os.path.join(base_dir, '../models/rnn_product/data'))

nn = rnn(
    reader=dr,
    log_dir=os.path.join(base_dir, 'logs'),
    checkpoint_dir=os.path.join(base_dir, 'checkpoints'),
    prediction_dir=os.path.join(base_dir, 'predictions'),
    optimizer='adam',
    learning_rate=.001,
    lstm_size=300,
    dilations=[2**i for i in range(6)],
    filter_widths=[2]*6,
    skip_channels=64,
    residual_channels=128,
    batch_size=128,
    num_training_steps=1000,
    early_stopping_steps=100,
    warm_start_init_step=0,
    regularization_constant=0.0,
    keep_prob=1.0,
    enable_parameter_averaging=False,
    num_restarts=2,
    min_steps_to_checkpoint=100,
    log_interval=20,
    num_validation_batches=4,
)
nn.fit() # Training finished, start prediction
nn.restore()
nn.predict()


