# Task: Predict User Item response under uniform exposure while learning from biased training data

Many current applications use recommendations in order to modify the natural user behavior, such as to increase the number of sales or the time spent on a website. This results in a gap between the final recommendation objective and the classical setup where recommendation candidates are evaluated by their coherence with past user behavior, by predicting either the missing entries in the user-item matrix, or the most likely next event. To bridge this gap, we optimize a recommendation policy for the task of increasing the desired outcome versus the organic user behavior. We show this is equivalent to learning to predict recommendation outcomes under a fully random recommendation policy. To this end, we propose a new domain adaptation algorithm that learns from logged data containing outcomes from a biased recommendation policy and predicts recommendation outcomes according to random exposure. We compare our method against state-of-the-art factorization methods and new approaches of causal recommendation and show significant improvements.


# Dataset

**MovieLens 100k dataset** was collected by the GroupLens Research Project at the University of Minnesota.
 
This data set consists of:
	* 100,000 ratings (1-5) from 943 users on 1682 movies. 
	* Each user has rated at least 20 movies. 

The data was collected through the MovieLens web site (movielens.umn.edu) during the seven-month period from September 19th, 1997 through April 22nd, 1998.



# Solution:

**Causal Matrix Factorization** - for more details see: https://arxiv.org/abs/1706.07639

![TextCNN](./images/causalMF.png)


# Metrics:

### * MSE - Mean Squared Error
### * NLL - Negative Log Likelihood
### * AUC - Area Under the Curve


-----------------------------
-----------------------------



# Questions:


### Q1: Add the definition for create_counterfactual_regularizer() method
### Q2: Compare the results of using variable values for cf_pen hyperparameter (0 vs. bigger)
### Q3: Compare different types of optimizers
### Q4: Push the performance as high as possible!

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import os
import string
import tempfile
import time
import numpy as np
import matplotlib.pyplot as plt
import csv
import random

import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
from tensorboard import summary as summary_lib

from __future__ import absolute_import
from __future__ import print_function

tf.set_random_seed(42)

tf.logging.set_verbosity(tf.logging.INFO)
print(tf.__version__)

In [None]:
# Hyper-Parameters
flags = tf.app.flags 

tf.app.flags.DEFINE_string('f', '', 'kernel')

flags.DEFINE_string('data_set', 'user_prod_dict.skew.', 'Dataset string.')  # Reg Skew
flags.DEFINE_string('adapt_stat', 'adapt_2i', 'Adapt String.')  # Adaptation strategy
flags.DEFINE_string('model_name', 'cp2v', 'Name of the model for saving.')
flags.DEFINE_float('learning_rate', 1.0, 'Initial learning rate.')
flags.DEFINE_integer('num_epochs', 1, 'Number of epochs to train.')
flags.DEFINE_integer('num_steps', 100, 'Number of steps after which to test.')
flags.DEFINE_integer('embedding_size', 100, 'Size of each embedding vector.')
flags.DEFINE_integer('batch_size', 512, 'How big is a batch of training.')
flags.DEFINE_float('cf_pen', 10.0, 'Counterfactual regularizer hyperparam.')
flags.DEFINE_float('l2_pen', 0.0, 'L2 regularizer hyperparam.')
flags.DEFINE_string('cf_loss', 'l1', 'Use L1 or L2 for the loss .')
FLAGS = tf.app.flags.FLAGS


In [None]:
#_DATA_PATH = "/Users/f.vasile/MyFolders/MyProjects/1.MyPapers/2018_Q2_DS3_Course/code/cp2v/src/Data/"
_DATA_PATH = "./data/"

train_data_set_location = _DATA_PATH + FLAGS.data_set +  "train." + FLAGS.adapt_stat + ".csv" # Location of train dataset
test_data_set_location = _DATA_PATH + FLAGS.data_set +  "test." + FLAGS.adapt_stat + ".csv" # Location of the test dataset
validation_test_set_location = _DATA_PATH + FLAGS.data_set +  "valid_test." + FLAGS.adapt_stat + ".csv" # Location of the validation dataset
validation_train_set_location = _DATA_PATH + FLAGS.data_set +  "valid_train." + FLAGS.adapt_stat + ".csv" #Location of the validation dataset
model_name = FLAGS.model_name + ".ckpt"

print(train_data_set_location)


def calculate_vocab_size(file_location):
    """Calculate the total number of unique elements in the dataset"""

    with open(file_location, 'r') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        useridtemp = []
        productid = []
        for row in reader:
            useridtemp.append(row[0])
            productid.append(row[1])

    userid_size = len(set(useridtemp))
    productid_size = len(set(productid))

    return userid_size, productid_size


userid_size, productid_size = calculate_vocab_size(train_data_set_location) # Calculate the total number of unique elements in the dataset

print(str(userid_size))
print(str(productid_size))

plot_gradients = False # Plot the gradients
cost_val = []
tf.set_random_seed(42)

In [None]:
def load_train_dataset(dataset_location, batch_size, num_epochs):
    """Load the training data using TF Dataset API"""

    with tf.name_scope('train_dataset_loading'):

        record_defaults = [[1], [1], [0.]] # Sets the type of the resulting tensors and default values
        # Dataset is in the format - UserID ProductID Rating
        dataset = tf.data.TextLineDataset(dataset_location).map(lambda line: tf.decode_csv(line, record_defaults=record_defaults))
        dataset = dataset.shuffle(buffer_size=10000)
        dataset = dataset.batch(batch_size)
        dataset = dataset.cache()
        dataset = dataset.repeat(num_epochs)
        iterator = dataset.make_one_shot_iterator()
        user_batch, product_batch, label_batch = iterator.get_next()
        label_batch = tf.expand_dims(label_batch, 1)

    return user_batch, product_batch, label_batch


def load_test_dataset(dataset_location):
    """Load the test and validation datasets"""

    user_list = []
    product_list = []
    labels = []

    with open(dataset_location, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            user_list.append(row[0])
            product_list.append(row[1])
            labels.append(row[2])

    labels = np.reshape(labels, [-1, 1])
    cr = compute_empirical_cr(labels)

    return user_list, product_list, labels, cr


def compute_2i_regularization_id(prods, num_products):
    """Compute the ID for the regularization for the 2i approach"""

    reg_ids = []
    # Loop through batch and compute if the product ID is greater than the number of products
    for x in np.nditer(prods):
        if x >= num_products:
            reg_ids.append(x)
        elif x < num_products:
            reg_ids.append(x + num_products) # Add number of products to create the 2i representation 

    return np.asarray(reg_ids)


def generate_bootstrap_batch(seed, data_set_size):
    """Generate the IDs for the bootstap"""

    random.seed(seed)
    ids = [random.randint(0, data_set_size-1) for j in range(int(data_set_size*0.8))]

    return ids


def compute_empirical_cr(labels):
    """Compute the cr from the empirical data"""

    labels = labels.astype(np.float)
    clicks = np.count_nonzero(labels)
    views = len(np.where(labels==0)[0])
    cr = float(clicks)/float(views)

    return cr


def create_average_predictor_tensors(label_list_placeholder, logits_placeholder):
    """Create the tensors required to run the averate predictor for the bootstraps"""

    with tf.device('/cpu:0'):
        
        with tf.variable_scope('ap_logits'):
            ap_logits = tf.reshape(logits_placeholder, [tf.shape(label_list_placeholder)[0], 1])

        with tf.name_scope('ap_losses'):
            
            ap_mse_loss = tf.losses.mean_squared_error(labels=label_list_placeholder, predictions=ap_logits)
            ap_log_loss =  tf.losses.log_loss(labels=label_list_placeholder, predictions=ap_logits)

        with tf.name_scope('ap_metrics'):
            # Add performance metrics to the tensorflow graph
            ap_correct_predictions = tf.equal(tf.round(ap_logits), label_list_placeholder)
            ap_accuracy = tf.reduce_mean(tf.cast(ap_correct_predictions, tf.float32))

    return ap_mse_loss, ap_log_loss

def compute_bootstraps_2i(sess, model, test_user_batch, test_product_batch, test_label_batch, test_logits, running_vars_initializer, ap_mse_loss, ap_log_loss):
    """Compute the bootstraps for the 2i model"""
    
    data_set_size = len(test_user_batch)
    mse = []
    llh = []
    ap_mse = []
    ap_llh = []
    auc_list = []
    mse_diff = []
    llh_diff = []

    # Compute the bootstrap values for the test split - this compute the empirical CR as well for comparision
    for i in range(30):

        ids = generate_bootstrap_batch(i*2, data_set_size)
        test_user_batch = np.asarray(test_user_batch)
        test_product_batch = np.asarray(test_product_batch)
        test_label_batch = np.asarray(test_label_batch)

        # Reset the running variables used for the AUC
        sess.run(running_vars_initializer)

        # Construct the feed-dict for the model and the average predictor
        feed_dict = {model.user_list_placeholder : test_user_batch[ids], model.product_list_placeholder: test_product_batch[ids], model.label_list_placeholder: test_label_batch[ids], model.logits_placeholder: test_logits[ids], model.reg_list_placeholder: test_product_batch[ids]}

        # Run the model test step updating the AUC object
        _, loss_val, mse_loss_val, log_loss_val = sess.run([model.auc_update_op, model.loss, model.mse_loss, model.log_loss], feed_dict=feed_dict)
        auc_score = sess.run(model.auc, feed_dict=feed_dict)

        # Run the Average Predictor graph
        ap_mse_val, ap_log_val = sess.run([ap_mse_loss, ap_log_loss], feed_dict=feed_dict)

        mse.append(mse_loss_val)
        llh.append(log_loss_val)
        ap_mse.append(ap_mse_val)
        ap_llh.append(ap_log_val)
        auc_list.append(auc_score)

    for i in range(30):
        mse_diff.append((ap_mse[i]-mse[i]) / ap_mse[i])
        llh_diff.append((ap_llh[i]-llh[i]) / ap_llh[i])

    print("MSE Mean Score On The Bootstrap = ", np.mean(mse))
    print("MSE Mean Lift Over Average Predictor (%) = ", np.round(np.mean(mse_diff)*100, decimals=2))
    print("MSE STD (%) =" , np.round(np.std(mse_diff)*100, decimals=2))

    print("LLH Mean Over Average Predictor (%) =", np.round(np.mean(llh_diff)*100, decimals=2))
    print("LLH STD (%) = ", np.round(np.std(llh_diff)*100, decimals=2))

    print("Mean AUC Score On The Bootstrap = ",  np.round(np.mean(auc_list), decimals=4), "+/-", np.round(np.std(auc_list), decimals=4))

### About Supervised Prod2vec 

- Class to define MF of the implicit feedback matrix (1/0/unk) of Users x Products

- When called it creates the TF graph for the associated NN:

Step1: self.create_placeholders() => Creates the input placeholders

Step2: self.build_graph() => Creates the 3 layers: 
    - the user embedding layer
    - the product embedding layer 
    - the output prediction layer

Step3: self.create_losses() => Defines the loss function for prediction

Step4: self.add_optimizer() => Defines the optimizer

Step5: self.add_performance_metrics() => Defines the logging performance metrics ???

Step6: self.add_summaries() => Defines the final performance stats

 

In [None]:
class SupervisedProd2vec():
    def __init__(self, userid_size, productid_size, embedding_size, l2_pen, learning_rate):

        self.userid_size = userid_size
        self.productid_size = productid_size
        self.embedding_size = embedding_size
        self.l2_pen = l2_pen
        self.learning_rate = learning_rate

        # Build the graph
        self.create_placeholders()
        self.build_graph()
        self.create_losses()
        self.add_optimizer()
        self.add_performance_metrics()
        self.add_summaries()
        
    def create_placeholders(self):
        """Create the placeholders to be used """
        
        self.user_list_placeholder = tf.placeholder(tf.int32, [None], name="user_list_placeholder")
        self.product_list_placeholder = tf.placeholder(tf.int32, [None], name="product_list_placeholder")
        self.label_list_placeholder = tf.placeholder(tf.float32, [None, 1], name="label_list_placeholder")

        # logits placeholder used to store the test CR for the bootstrapping process
        self.logits_placeholder = tf.placeholder(tf.float32, [None], name="logits_placeholder")

        
    def build_graph(self):
        """Build the main tensorflow graph with embedding layers"""

        with tf.name_scope('embedding_layer'):

            # User matrix and current batch
            self.user_embeddings = tf.get_variable("user_embeddings", shape=[self.userid_size, self.embedding_size], initializer=tf.contrib.layers.xavier_initializer(), trainable=True)
            self.user_embed = tf.nn.embedding_lookup(self.user_embeddings, self.user_list_placeholder) # Lookup the Users for the given batch
            self.user_b = tf.Variable(tf.zeros([self.userid_size]), name='user_b', trainable=True)
            self.user_bias_embed = tf.nn.embedding_lookup(self.user_b, self.user_list_placeholder)

            # Product embedding
            self.product_embeddings = tf.get_variable("product_embeddings", shape=[self.productid_size, self.embedding_size], initializer=tf.contrib.layers.xavier_initializer(), trainable=True)
            self.product_embed = tf.nn.embedding_lookup(self.product_embeddings, self.product_list_placeholder) # Lookup the embeddings2 for the given batch
            self.prod_b = tf.Variable(tf.zeros([self.productid_size]), name='prod_b', trainable=True)
            self.prod_bias_embed = tf.nn.embedding_lookup(self.prod_b, self.product_list_placeholder)

        with tf.variable_scope('logits'):

            self.b = tf.get_variable('b', [1], initializer=tf.constant_initializer(0.0, dtype=tf.float32), trainable=True)
            self.alpha = tf.get_variable('alpha', [], initializer=tf.constant_initializer(0.00000001, dtype=tf.float32), trainable=True)
            
            #alpha * (<user_i, prod_j> 
            self.emb_logits = self.alpha * tf.reshape(tf.reduce_sum(tf.multiply(self.user_embed, self.product_embed), 1), [tf.shape(self.user_list_placeholder)[0], 1])
            
            #prod_bias + user_bias + global_bias
            self.logits = tf.reshape(tf.add(self.prod_bias_embed, self.user_bias_embed), [tf.shape(self.user_list_placeholder)[0], 1]) + self.b
            
            self.logits = self.emb_logits + self.logits

            self.prediction = tf.sigmoid(self.logits, name='sigmoid_prediction')

            
    def create_losses(self):
        """Create the losses"""

        with tf.name_scope('losses'):
            #Sigmoid loss between the logits and labels
            self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.label_list_placeholder))
            
            #Adding the regularizer term on user vct and prod vct
            self.loss = self.loss + self.l2_pen * tf.nn.l2_loss(self.user_embeddings) + self.l2_pen * tf.nn.l2_loss(self.product_embeddings) + self.l2_pen * tf.nn.l2_loss(self.prod_b) + self.l2_pen * tf.nn.l2_loss(self.user_b)

            #Compute MSE loss
            self.mse_loss = tf.losses.mean_squared_error(labels=self.label_list_placeholder, predictions=tf.sigmoid(self.logits))
            
            #Compute Log loss
            self.log_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.label_list_placeholder))
            
            
    def add_optimizer(self):
        """Add the required optimiser to the graph"""

        with tf.name_scope('optimizer'):
            # Global step variable to keep track of the number of training steps
            self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')   
            self.apply_grads = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

                
    def add_performance_metrics(self):
        """Add the required performance metrics to the graph"""
        
        with tf.name_scope('performance_metrics'):
            # Add performance metrics to the tensorflow graph
            correct_predictions = tf.equal(tf.round(self.prediction), self.label_list_placeholder)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy")
            self.auc, self.auc_update_op = tf.metrics.auc(labels=self.label_list_placeholder, predictions=self.prediction, num_thresholds=1000, name="auc_metric")

            
    def add_summaries(self):
        """Add the required summaries to the graph"""

        with tf.name_scope('summaries'):
            # Add loss to the summaries
            tf.summary.scalar('total_loss', self.loss)
            tf.summary.histogram('histogram_total_loss', self.loss)

            # Add weights to the summaries
            tf.summary.histogram('user_embedding_weights', self.user_embeddings)
            tf.summary.histogram('product_embedding_weights', self.product_embeddings)
            tf.summary.histogram('logits', self.logits)
            tf.summary.histogram('prod_b', self.prod_b)
            tf.summary.histogram('user_b', self.user_b)
            tf.summary.histogram('global_bias', self.b)

            tf.summary.scalar('alpha', self.alpha)


### CausalProd2Vec2i - inherits from SupervisedProd2vec

- Class to define the causal version of MF of the implicit feedback matrix (1/0/unk) of Users x Products

- When called it creates the TF graph for the associated NN:

**Step1: Changed: +regularizer placeholder** self.create_placeholders() => Creates the input placeholders 

**Step2:** self.build_graph() => Creates the 3 layers: 
    - the user embedding layer
    - the product embedding layer 
    - the output prediction layer

**New:**

        self.create_control_embeddings()
        self.create_counter_factual_loss()


**Step3: Changed: +add regularizer between embeddings** self.create_losses() => Defines the loss function for prediction

**Step4:** self.add_optimizer() => Defines the optimizer

**Step5:** self.add_performance_metrics() => Defines the logging performance metrics ???

**Step6:** self.add_summaries() => Defines the final performance stats



In [None]:
class CausalProd2Vec2i(SupervisedProd2vec):
    def __init__(self, userid_size, productid_size, embedding_size, l2_pen, learning_rate, cf_pen, cf='l1'):

        self.userid_size = userid_size
        self.productid_size = productid_size * 2 # Doubled to accommodate the treatment embeddings 
        self.embedding_size = embedding_size
        self.l2_pen = l2_pen
        self.learning_rate = learning_rate
        self.cf_pen = cf_pen
        self.cf = cf

        # Build the graph
        self.create_placeholders()
        self.build_graph()
        self.create_control_embeddings()
        #self.create_counterfactual_regularizer()
        self.create_losses()
        self.add_optimizer()
        self.add_performance_metrics()
        self.add_summaries()
        
    def create_placeholders(self):
        """Create the placeholders to be used """
        
        self.user_list_placeholder = tf.placeholder(tf.int32, [None], name="user_list_placeholder")
        self.product_list_placeholder = tf.placeholder(tf.int32, [None], name="product_list_placeholder")
        self.label_list_placeholder = tf.placeholder(tf.float32, [None, 1], name="label_list_placeholder")
        self.reg_list_placeholder = tf.placeholder(tf.int32, [None], name="reg_list_placeholder")

        # logits placeholder used to store the test CR for the bootstrapping process
        self.logits_placeholder = tf.placeholder(tf.float32, [None], name="logits_placeholder")

        
    def create_control_embeddings(self):
        """Create the control embeddings"""

        with tf.name_scope('control_embedding'):
            # Get the control embedding at id 0
            self.control_embed = tf.stop_gradient(tf.nn.embedding_lookup(self.product_embeddings, self.reg_list_placeholder))
  

    #################################
    ##  SOLUTION TO Q1 GOES HERE!  ##
    #################################
    #def create_counterfactual_regularizer(self):
    
    # self.cf_reg
    
                
            
    def create_losses(self):
        """Create the losses"""

        with tf.name_scope('losses'):
            #Sigmoid loss between the logits and labels
            self.log_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.label_list_placeholder))
            
            #Adding the regularizer term on user vct and prod vct and their bias terms
            reg_term = self.l2_pen * ( tf.nn.l2_loss(self.user_embeddings) + tf.nn.l2_loss(self.product_embeddings) )
            reg_term_biases = self.l2_pen * ( tf.nn.l2_loss(self.prod_b) + tf.nn.l2_loss(self.user_b) )
            self.loss = self.log_loss + reg_term + reg_term_biases
            
            #Adding the counterfactual regualizer term
            # Q1: Write the method that computes the counterfactual regularizer
            #self.create_counterfactual_regularizer()
            #self.loss = self.loss + (self.cf_pen * self.cf_reg)

            #Compute addtionally the MSE loss
            self.mse_loss = tf.losses.mean_squared_error(labels=self.label_list_placeholder, predictions=tf.sigmoid(self.logits))
          


### Create the TF Graph

In [None]:
# Create graph object
graph = tf.Graph()
with graph.as_default():

    with tf.device('/cpu:0'):
        # Load the required graph

        ### Number of products and users
        productid_size = 1683
        userid_size = 944

        model = CausalProd2Vec2i(userid_size, productid_size+1, FLAGS.embedding_size, FLAGS.l2_pen, FLAGS.learning_rate, FLAGS.cf_pen, cf=FLAGS.cf_loss)

        ap_mse_loss, ap_log_loss = create_average_predictor_tensors(model.label_list_placeholder, model.logits_placeholder)
        
        # Define initializer to initialize/reset running variables
        running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="performance_metrics/auc_metric")
        running_vars_initializer = tf.variables_initializer(var_list=running_vars)

        # Get train data batch from queue
        next_batch = load_train_dataset(train_data_set_location, FLAGS.batch_size, FLAGS.num_epochs)
        test_user_batch, test_product_batch, test_label_batch, test_cr = load_test_dataset(test_data_set_location)
        val_test_user_batch, val_test_product_batch, val_test_label_batch, val_cr = load_test_dataset(validation_test_set_location)
        val_train_user_batch, val_train_product_batch, val_train_label_batch, val_cr = load_test_dataset(validation_train_set_location)

        # create the empirical CR test logits 
        test_logits = np.empty(len(test_label_batch))
        test_logits.fill(test_cr)


### Launch the Session: Train the model

In [None]:
# Launch the Session
with tf.Session(graph=graph, config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:

    # initialise all the TF variables
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    # Setup tensorboard: tensorboard --logdir=/tmp/tensorboard
    time_tb = str(time.ctime(int(time.time())))
    train_writer = tf.summary.FileWriter('/tmp/tensorboard' + '/train' + time_tb, sess.graph)
    test_writer = tf.summary.FileWriter('/tmp/tensorboard' + '/test' + time_tb, sess.graph)
    merged = tf.summary.merge_all()

    # Embeddings viz (Possible to add labels for embeddings later)
    saver = tf.train.Saver()
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = model.product_embeddings.name
    projector.visualize_embeddings(train_writer, config)

    # Variables used in the training loop
    t = time.time()
    step = 0
    average_loss = 0
    average_mse_loss = 0
    average_log_loss = 0

    # Start the training loop---------------------------------------------------------------------------------------------
    print("Starting Training On Causal Prod2Vec")
    print(FLAGS.cf_loss)
    print("Num Epochs = ", FLAGS.num_epochs)
    print("Learning Rate = ", FLAGS.learning_rate)
    print("L2 Reg = ", FLAGS.l2_pen)
    print("CF Reg = ", FLAGS.cf_pen)

    try:
        while True:
            # Run the TRAIN for this step batch ---------------------------------------------------------------------
            # Construct the feed_dict
            user_batch, product_batch, label_batch = sess.run(next_batch)
            # Treatment is the small set of samples from St, Control is the larger set of samples from Sc
            reg_ids = compute_2i_regularization_id(product_batch, productid_size) # Compute the product ID's for regularization
            feed_dict = {model.user_list_placeholder : user_batch, model.product_list_placeholder: product_batch, model.reg_list_placeholder: reg_ids, model.label_list_placeholder: label_batch}
            
            # Run the graph
            _, sum_str, loss_val, mse_loss_val, log_loss_val = sess.run([model.apply_grads, merged, model.loss, model.mse_loss, model.log_loss], feed_dict=feed_dict)

            step +=1
            average_loss += loss_val
            average_mse_loss += mse_loss_val
            average_log_loss += log_loss_val

            # Every num_steps print average loss
            if step % FLAGS.num_steps == 0:
                if step > FLAGS.num_steps:
                    # The average loss is an estimate of the loss over the last set batches.
                    average_loss /= FLAGS.num_steps
                    average_mse_loss /= FLAGS.num_steps
                    average_log_loss /= FLAGS.num_steps
                print("Average Training Loss on S_c (FULL, MSE, NLL) at step ", step, ": ", average_loss, ": ", average_mse_loss, ": ", average_log_loss, "Time taken (S) = " + str(round(time.time() - t, 1)))

                average_loss = 0
                t = time.time() # reset the time
                train_writer.add_summary(sum_str, step) # Write the summary

                # Run the VALIDATION for this step batch ---------------------------------------------------------------------
                val_train_product_batch = np.asarray(val_train_product_batch, dtype=np.float32)
                val_test_product_batch = np.asarray(val_test_product_batch, dtype=np.float32)
                vaL_train_reg_ids = compute_2i_regularization_id(val_train_product_batch, productid_size) # Compute the product ID's for regularization
                vaL_test_reg_ids = compute_2i_regularization_id(val_test_product_batch, productid_size) # Compute the product ID's for regularization
                feed_dict_test = {model.user_list_placeholder : val_test_user_batch, model.product_list_placeholder: val_test_product_batch, model.reg_list_placeholder: vaL_test_reg_ids,  model.label_list_placeholder: val_test_label_batch}
                feed_dict_train = {model.user_list_placeholder : val_train_user_batch, model.product_list_placeholder: val_train_product_batch, model.reg_list_placeholder: vaL_train_reg_ids, model.label_list_placeholder: val_train_label_batch}
     
                sum_str, loss_val, mse_loss_val, log_loss_val = sess.run([merged, model.loss, model.mse_loss, model.log_loss], feed_dict=feed_dict_train)
                print("Validation loss on S_c (FULL, MSE, NLL) at step ", step, ": ", loss_val, ": ", mse_loss_val, ": ", log_loss_val)
            
                sum_str, loss_val, mse_loss_val, log_loss_val = sess.run([merged, model.loss, model.mse_loss, model.log_loss], feed_dict=feed_dict_test)
                cost_val.append(loss_val)
                print("Validation loss on S_t(FULL, MSE, NLL) at step ", step, ": ", loss_val, ": ", mse_loss_val, ": ", log_loss_val)
                     
                print("####################################################################################################################")   

                test_writer.add_summary(sum_str, step) # Write the summary
                    
    except tf.errors.OutOfRangeError:
        print("Reached the number of epochs")

    finally:
        saver.save(sess, os.path.join('/tmp/tensorboard', model_name), model.global_step) # Save model

    train_writer.close()
    print("Training Complete")

    # Run the bootstrap for this model ---------------------------------------------------------------------------------------------------------------
    print("Begin Bootstrap process...")
    print("Running BootStrap On The Control Representations")
    compute_bootstraps_2i(sess, model, test_user_batch, test_product_batch, test_label_batch, test_logits, running_vars_initializer, ap_mse_loss, ap_log_loss)

    print("Running BootStrap On The Treatment Representations")
    test_product_batch = [int(x) + productid_size for x in test_product_batch]
    compute_bootstraps_2i(sess, model, test_user_batch, test_product_batch, test_label_batch, test_logits, running_vars_initializer, ap_mse_loss, ap_log_loss)
