# Assignment 3 - Nested Logit Model
Yupeng Wu

Compared with the MNL model, the nested logit model is more flexible and can capture the correlation between alternatives. In interms of log-likelihood, we have the following results:
- MNL-nonlinear: -5223.841 (in class)
- Nested Logit-nonlinear: -5046.323


In [1]:
import os
import json
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import random 

In [2]:
from utils_ import format_data_neural_network_general

X,Y = format_data_neural_network_general()

random.seed(1234)
np.random.seed(1234)
split = list(zip(*[train_test_split(d,test_size = 0.25, random_state = 1234) for d in X]))
Xtrain, Xtest = split[0], split[1]
Ytrain, Ytest = train_test_split(Y,test_size = 0.25, random_state = 1234)

RAW DATA | The number of observations is 10,728.
RAW DATA | The number of columns per observations is 29.
DROPPING NO CHOICE | The number of observations is 10,719.
83 customer features.
12 product features.


In [3]:
# constructs the Nested MNL model class and a custom callback class

class NestedLogit(tf.keras.Model):
    
    def __init__(self):
        
        super(NestedLogit, self).__init__()
        self.intermediate = [Dense(10, activation="relu") for i in range(2)]
        self.last = Dense(1, activation="linear",
                          use_bias= False) 
        self.nl_name = "NestedLogit_nonlinear"
        
    def call(self,inputs):
        U = inputs[:3]
        Z = inputs[4:]
        Z = tf.keras.layers.Concatenate()(Z)

        U = [tf.keras.layers.Concatenate()([U[i],Z])  for i in range(3)]
        for j in range(2):
          U = [self.intermediate[j](U[i]) for i in range(3)]                
        U = [self.last(U[i]) for i in range(3)] 
        avail = inputs[3]

        combined = tf.keras.layers.Concatenate()(U)
        combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined)
        combined = combined*avail

        nest1 = combined[:, :2]
        nest1_max = tf.expand_dims(tf.reduce_max(nest1, axis=1), axis=1)

        combined_level1 = tf.concat([nest1_max, tf.expand_dims(combined[:, 2], axis=1)], axis=1)
        combined_level1 = tf.math.divide(combined_level1,tf.reduce_sum(combined_level1, axis=1, keepdims=True))

        prob_nest1 = tf.math.divide(nest1,tf.reduce_sum(nest1, axis=1, keepdims=True)) * tf.expand_dims(combined_level1[:, 0], axis=1)
        prob = tf.concat([prob_nest1, tf.expand_dims(combined_level1[:, 1], axis=1)], axis=1)
        return prob
    
# constructs the Nested MNL model class and a custom callback class

class NestedLogit_linear(tf.keras.Model):
    
    def __init__(self):
        
        super(NestedLogit_linear, self).__init__()
        self.last = Dense(1, activation="linear",
                          use_bias= False) 
        self.nl_name = 'NestedLogit_linear'
        
    def call(self,inputs):
        U = inputs[:3]
        Z = inputs[4:]
        Z = tf.keras.layers.Concatenate()(Z)

        U = [tf.keras.layers.Concatenate()([U[i],Z])  for i in range(3)]            
        U = [self.last(U[i]) for i in range(3)] 
        avail = inputs[3]

        combined = tf.keras.layers.Concatenate()(U)
        combined = tf.keras.layers.Activation(activation=tf.nn.softmax)(combined)
        combined = combined*avail

        nest1 = combined[:, :2]
        nest1_max = tf.expand_dims(tf.reduce_max(nest1, axis=1), axis=1)

        combined_level1 = tf.concat([nest1_max, tf.expand_dims(combined[:, 2], axis=1)], axis=1)
        prob_level1 = tf.math.divide(combined_level1,tf.reduce_sum(combined_level1, axis=1, keepdims=True))

        prob_nest1 = tf.math.divide(nest1,tf.reduce_sum(nest1, axis=1, keepdims=True)) * tf.expand_dims(prob_level1[:, 0], axis=1)
        prob = tf.concat([prob_nest1, tf.expand_dims(prob_level1[:, 1], axis=1)], axis=1)

        return prob
    
  

In [4]:
##### Create model
model_nl = NestedLogit()
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)  #  #tf.keras.optimizers.Adam(learning_rate=0.001) # # # #
model_nl.compile(optimizer=optimizer, 
                    loss=loss, 
                    metrics = ["accuracy"])

es = EarlyStopping(monitor = 'val_loss',
                        patience = 20,
                        verbose = 0,
                        restore_best_weights =True)

callbacks = []
callbacks += [es]                
    
###### Fitting model
history = model_nl.fit(X,Y,
                    validation_split = 0.1,
                    batch_size= 32,
                    epochs= 200,
                    callbacks = callbacks,
                    verbose= 2, #0
                    shuffle=True)

y = model_nl.predict(X)
print('{} - Log-likelihood'.format(model_nl.nl_name), np.sum(np.array([np.log(y[i][Y[i]]) for i in range(len(Y))])))



Epoch 1/200
302/302 - 2s - loss: 0.8534 - accuracy: 0.5812 - val_loss: 0.7629 - val_accuracy: 0.6642 - 2s/epoch - 5ms/step
Epoch 2/200
302/302 - 1s - loss: 0.7236 - accuracy: 0.6745 - val_loss: 0.7064 - val_accuracy: 0.6884 - 619ms/epoch - 2ms/step
Epoch 3/200
302/302 - 0s - loss: 0.6819 - accuracy: 0.6901 - val_loss: 0.6715 - val_accuracy: 0.7034 - 467ms/epoch - 2ms/step
Epoch 4/200
302/302 - 1s - loss: 0.6577 - accuracy: 0.7012 - val_loss: 0.6517 - val_accuracy: 0.7248 - 749ms/epoch - 2ms/step
Epoch 5/200
302/302 - 1s - loss: 0.6407 - accuracy: 0.7121 - val_loss: 0.6365 - val_accuracy: 0.7276 - 508ms/epoch - 2ms/step
Epoch 6/200
302/302 - 1s - loss: 0.6277 - accuracy: 0.7175 - val_loss: 0.6321 - val_accuracy: 0.7220 - 771ms/epoch - 3ms/step
Epoch 7/200
302/302 - 1s - loss: 0.6176 - accuracy: 0.7299 - val_loss: 0.6239 - val_accuracy: 0.7155 - 616ms/epoch - 2ms/step
Epoch 8/200
302/302 - 1s - loss: 0.6087 - accuracy: 0.7329 - val_loss: 0.6245 - val_accuracy: 0.7192 - 528ms/epoch - 2ms/