# Wide and Deep Learning - TensorFlow Tutorial

### Getting started

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time

import numpy as np
import tensorflow as tf
import pandas as pd

tf.logging.set_verbosity(tf.logging.INFO) 

print("Using TensorFlow version %s" % (tf.__version__)) 

  from ._conv import register_converters as _register_converters


Using TensorFlow version 1.5.0


## Generating input

In [3]:
BATCH_SIZE = 70

def generate_input_fn(filename, num_epochs=3, shuffle=True, batch_size=BATCH_SIZE):
    df = pd.read_csv(filename, header=None, names=COLUMNS)
    labels = df["reordered"].astype(int) 
    del df["reordered"] # Labels column, already saved to labels variable
    
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=labels,
        batch_size=batch_size,
        num_epochs=num_epochs,
        shuffle=shuffle)

print('input function configured')

input function configured


## Defining features

#### // Catergorical base columns:

In [4]:
aisle_id = tf.feature_column.categorical_column_with_hash_bucket(
  "aisle_id", hash_bucket_size=100)
department_id = tf.feature_column.categorical_column_with_hash_bucket(
  "department_id", hash_bucket_size=100)
order_id = tf.feature_column.categorical_column_with_hash_bucket(
  "order_id", hash_bucket_size=500000)
user_id = tf.feature_column.categorical_column_with_hash_bucket(
  "user_id", hash_bucket_size=50000)
product_id = tf.feature_column.categorical_column_with_hash_bucket(
  "product_id", hash_bucket_size=50000)

print('Categorical columns configured')

Categorical columns configured


#### // Continuous base columns 

In [5]:
user_orders = tf.feature_column.numeric_column("user_orders")
order_number = tf.feature_column.numeric_column("order_number")
add_to_cart_order = tf.feature_column.numeric_column("add_to_cart_order")
days_since_prior_order = tf.feature_column.numeric_column("days_since_prior_order")
order_hour_of_day = tf.feature_column.numeric_column("order_hour_of_day")
order_dow = tf.feature_column.numeric_column("order_dow")

print('Continuous columns configured')

Continuous columns configured


#### // Transformations

In [6]:
order_hour_of_day_buckets = tf.feature_column.bucketized_column(
    order_hour_of_day, boundaries=[ 0, 6, 12, 18 ])
order_day_hour = tf.feature_column.crossed_column(
    ["order_hour_of_day", "order_dow"], hash_bucket_size=int(1e4))

print('Transformations complete')

Transformations complete


#### // Wide columns and deep columns

In [14]:
wide_columns = [order_dow, order_hour_of_day, aisle_id, order_number,
      user_id, department_id, order_id, add_to_cart_order, user_orders,
      product_id, days_since_prior_order, order_day_hour, order_hour_of_day_buckets]

deep_columns = [
    tf.feature_column.embedding_column(aisle_id, dimension=30),
    tf.feature_column.embedding_column(department_id, dimension=30),
    tf.feature_column.embedding_column(order_id, dimension=30),
    tf.feature_column.embedding_column(user_id, dimension=30),
    tf.feature_column.embedding_column(product_id, dimension=30),
]

print('wide and deep columns configured')

wide and deep columns configured


## Building the model

In [15]:
def create_model_dir(model_type):
    return 'models/model_' + model_type + '_' + str(int(time.time()))

# If new_model=False, pass in the desired model_dir 
def get_model(model_type, new_model=False, model_dir=None):
    if new_model or model_dir is None:
        model_dir = create_model_dir(model_type) # Comment out this line to continue training a existing model
    print("Model directory = %s" % model_dir)
    
    m = None
    
    # Linear Classifier
    if model_type == 'WIDE':
        m = tf.estimator.LinearClassifier(
            model_dir=model_dir, 
            feature_columns=wide_columns)

    # Deep Neural Net Classifier
    if model_type == 'DEEP':
        m = tf.estimator.DNNClassifier(
            model_dir=model_dir,
            feature_columns=deep_columns,
            hidden_units=[200, 150])

    # Combined Linear and Deep Classifier
    if model_type == 'WIDE_AND_DEEP':
        m = tf.estimator.DNNLinearCombinedClassifier(
                model_dir=model_dir,
                linear_feature_columns=wide_columns,
                dnn_feature_columns=deep_columns,
                dnn_hidden_units=[200, 150])
        
    print('estimator built')
    
    return m, model_dir
    
MODEL_TYPE = 'WIDE_AND_DEEP'
model_dir = create_model_dir(model_type=MODEL_TYPE)
m, model_dir = get_model(model_type = MODEL_TYPE, model_dir=model_dir)

Model directory = models/model_WIDE_AND_DEEP_1518649039
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'models/model_WIDE_AND_DEEP_1518649039', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000001C1C4710>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
estimator built


#### Input variables

In [12]:
CATEGORICAL_COLUMNS = ["user_id", "order_id", "product_id", "aisle_id", "department_id"]


COLUMNS = ["user_id", "user_orders", "order_id", "order_number", "order_dow", "order_hour_of_day", 
           "days_since_prior_order", "product_id", "add_to_cart_order", "reordered", "aisle_id", "department_id"]

FEATURE_COLUMNS = ["user_id", "user_orders", "order_id", "order_number", "order_dow", "order_hour_of_day", 
           "days_since_prior_order", "product_id", "add_to_cart_order", "aisle_id", "department_id"]

## Train the model

In [18]:
%%time 

train_file = "train53w6.csv"

m.train(input_fn=generate_input_fn(train_file), steps=200)

print('training done')

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from models/model_WIDE_AND_DEEP_1518649039\model.ckpt-200
INFO:tensorflow:Saving checkpoints for 201 into models/model_WIDE_AND_DEEP_1518649039\model.ckpt.
INFO:tensorflow:loss = 25.852764, step = 201
INFO:tensorflow:global_step/sec: 104.597
INFO:tensorflow:loss = 26.685793, step = 301 (0.960 sec)
INFO:tensorflow:Saving checkpoints for 400 into models/model_WIDE_AND_DEEP_1518649039\model.ckpt.
INFO:tensorflow:Loss for final step: 18.131187.
training done
Wall time: 21.2 s


## Model evaluation

In [19]:
%%time
test_file  = "test53w6.csv"

results = m.evaluate(input_fn=generate_input_fn(test_file, num_epochs=3, shuffle=True), steps=100)

print('evaluate done')
print('\nAccuracy: %s' % results['accuracy'])

INFO:tensorflow:Starting evaluation at 2018-02-14-23:01:01
INFO:tensorflow:Restoring parameters from models/model_WIDE_AND_DEEP_1518649039\model.ckpt-400
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-02-14-23:01:04
INFO:tensorflow:Saving dict for global step 400: accuracy = 0.8387143, accuracy_baseline = 0.8387143, auc = 0.57192904, auc_precision_recall = 0.8716856, average_loss = 0.49119553, global_step = 400, label/mean = 0.8387143, loss = 34.383686, prediction/mean = 0.92375326
evaluate done

Accuracy: 0.8387143
Wall time: 5.49 s


## Save the model

In [17]:
def column_to_dtype(column):
    if column in CATEGORICAL_COLUMNS:
        return tf.string
    else:
        return tf.float32
    
feature_spec = {
    column: tf.FixedLenFeature(shape=[1], dtype=column_to_dtype(column))
        for column in FEATURE_COLUMNS
}
serving_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
m.export_savedmodel(export_dir_base=model_dir,# + '/export/', 
                            serving_input_receiver_fn=serving_fn)

INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Restoring parameters from models/model_WIDE_AND_DEEP_1518615108\model.ckpt-200
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"models/model_WIDE_AND_DEEP_1518615108\\temp-b'1518615455'\\saved_model.pb"


b'models/model_WIDE_AND_DEEP_1518615108\\1518615455'