In [112]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 20 13:40:18 2023
@author: jiay
"""
import os
import random
import multiprocessing as mp
import numpy as np
import pandas as pd
import scipy.optimize as opt

class ChoiceModels(object):
    
    '''
    This class defines methods that will be used later in speficying and estimating choice models.
    '''
   
    def load_data(self, path, file):
        df = pd.read_csv(os.path.join(path, file), sep='\s+', header=0)
        df['cons'] = 1.
        return df
    
    def expand_data(self, df, n):
        '''
        Parameters
        ----------
        df : a pandas data frame
            
        n : Integer
            Number of times to expand the data
        Returns
        -------
        An expanded pandas data frame with a panel structure
        '''
        df['Alt'] = [[str(i) for i in range(n)] for _ in range(len(df))]
        return df.explode('Alt')
    
    def create_choice_attributes(self, df, config):
        '''
        This method creates a panel structure of data to estimate the multinomial
        choice model speficied in the configuration file (config-- a json format file)
        '''
        # create dependent variable
        df_copy = df.copy()
        y_namelist = list(config['Alternatives']['0'].keys())
        df_copy['choice'] = list(zip(*[df_copy[v] for v in y_namelist]))
        df_copy = self.expand_data(df_copy, len(config['Alternatives']))
   
        df_copy['y'] = 0.
        for k,v in config['Alternatives'].items():
            label = tuple(v.values())
            df_copy.loc[(df_copy["Alt"]==k) & (df_copy['choice']==label), 'y'] = 1
        
        # create alternative specific attributes
        dic = config['Attributes']
        for var,info in dic.items():
            df_copy[var] = 0
            for alt, w in info['rule'].items():
                df_copy['junk'] = 0
                df_copy.loc[(df_copy['Alt'] == alt), 'junk'] = 1
                df_copy[var] = df_copy[var] + w * df_copy[info['variable']] * df_copy['junk'] 
        df_copy = df_copy.drop("junk", axis='columns')
        
        # creat interactions
        df_copy, xz_list = self.create_interactions(df_copy, config['Interactions']) 
        return {'data': df_copy, "xz": xz_list}
    
    def create_interactions(self, df, interact_list):
        '''
        Parameters
        ----------
        df : pandas data frame
            
        interact_list : a List
            The list contains pairs of variable names as tuples
        Returns
        -------
        df : pandas data frame after adding interactions
            
        xz_list : A list of created interactions
        '''
        xz_list = []
        if interact_list is None:
            return df, xz_list
        for item in interact_list:
            vname = item[0] + "_" + item[1]
            df[vname] = df[item[0]] * df[item[1]]
            xz_list.append(vname)
        return df, xz_list 
        
        
    def optimization(self, objfun, para):
        '''
        Parameters
        ----------
        objfun : a user defined objective function of para
            
        para : a 1-D array with the shape (k,), where k is the number of parameters.
        Returns
        -------
        dict
            A dictionary containing estimation results
        '''
        v = opt.minimize(objfun, x0=para, jac=None, method='BFGS', 
                          options={'maxiter': 1000, 'disp': True})  
        return {'log_likelihood':-1*v.fun, "Coefficients": v.x, "Var_Cov": v.hess_inv}

    def halton_sequence(self, ndraws, seed, randomize=True, shuffle=True, cut=100):
        '''
        This method generates Haton sequence for random drawing. 
        ndraws: an integer defining the length of generated sequece
        seed: a prime number
        
        Return: a 1-D array with the shape (ndraws,) 
        '''
        discard = random.randint(0, cut)
        n = ndraws + discard;     
        k = np.fix(np.log(n+1) / np.log(seed))
        phi = np.zeros(1)
        i = 1
        while i <= k+1:
            x = phi
            j = 1
            while j<seed:
                y = phi + (j/seed**i)
                x = np.concatenate((x, y))
                j += 1
            phi = x
            i += 1
            
        x=phi
        j=1
        while j<seed and len(x) < n:
            y=phi+(j/seed**i)
            x = np.concatenate((x, y))
            j += 1
        phi=x[discard:n]
        
        if randomize is True:
            phi = phi + np.random.rand()
            phi[phi>=1] -= 1
        
        if shuffle is True:
            random.shuffle(phi)
        return phi
    
class BinaryLogit(ChoiceModels):
    '''
    This class is to estimate a binary logit nodel by MLE.  
    '''
    def __init__(self, path, file, yname, x=None, z=None, interactions=None):
        df = super().load_data(path, file)
        if x is None:
            x = []
        if z is None:
            z = []
        if interactions is None:
            xz = []
            self.df = df
        else:
            self.df, xz = super().create_interactions(df, interactions)
            
        self.X_list = ['cons'] + x + z + xz
        self.Xmat = self.df[self.X_list].to_numpy()
        self.y = self.df[yname].to_numpy()
        
    def log_likelihood(self, para):
        '''
        Parameters
        ----------
        para : array
            a 1-D array with the shape(k,), where k is the number of model parameters.
        Returns
        -------
        res : scalar
            log-likelihood value
        '''
        xb = np.matmul(self.Xmat, para)
        xb = np.exp(xb)
        xb = xb / (1+xb)
        return (-1/len(xb)) * np.sum(self.y * np.log(xb) + (1-self.y) * np.log(1 - xb))
   
    def estimation(self, para):
        '''
        Parameters
        ----------
        para : array
            a 1-D array with the shape(k,), where k is the number of model parameters.
        Returns
        -------
        A dictionary of estimation results
        '''
        return super().optimization(self.log_likelihood, para)

class MultinomialLogit(ChoiceModels):

    # Specify model here    
    model_config = {"Alternatives":
                    {"0": {"trans": 1, "occupanc": 1, "route": 1},
                     "1": {"trans": 1, "occupanc": 1, "route": 0},
                     "2": {"trans": 1, "occupanc": 2, "route": 1},
                     "3": {"trans": 1, "occupanc": 2, "route": 0},
                     "4": {"trans": 1, "occupanc": 3, "route": 1},
                     "5": {"trans": 1, "occupanc": 3, "route": 0},
                     "6": {"trans": 0, "occupanc": 1, "route": 0},
                     "7": {"trans": 0, "occupanc": 2, "route": 0},
                     "8": {"trans": 0, "occupanc": 3, "route": 0}},
                    "Nests": {"0":{"0": ["0", "1"], "1": ["2", "3"], 
                                   "2": ["4", "5"]},"1":["6", "7", "8"]},
                    "Attributes":{'trans_dummy':{'variable': 'cons', 
                                                 'rule':{"0":1,"1":1,
                                                         "2":1,"3":1,"4":1,"5":1}},
                                  'express_dummy':{'variable':'cons', 
                                                   'rule':{"0":1,"2":1,"4":1}},
                                  'hov2_dummy':{'variable':'cons', 
                                               'rule':{"2":1,"3":1,"7":1}},
                                  "hov3_dummy":{'variable':'cons', 
                                                'rule':{"4":1,"5":1,"8":1}},
                                  "price":{"variable": 'toll', 
                                           "rule": {"0":1,"2":1/2,"4":1/6}},
                                  "time": {"variable":"median", 
                                           "rule":{"0":1,"2":1,"4":1}}},
                    "Interactions":[('express_dummy', "high_income"), ('express_dummy', "med_income")],
                    "Mixedlogit":{'fixed_coeffs':['express_dummy', 'express_dummy_high_income',
                                                 'express_dummy_med_income'],
                                  'random_coeffs':{'price':{'conditionals':['high_income', 'med_income'],
                                                            'distribution':'-lognormal'},
                                                   'time':{'conditionals':['distance'],
                                                           'distribution':'normal'},
                                                   'trans_dummy':{'conditionals':None, 
                                                                  'distribution':'normal'},
                                                   'hov2_dummy':{'conditionals':['householdsize'],
                                                                 'distribution':'normal'},
                                                   'hov3_dummy':{'conditionals':['householdsize'],
                                                                 'distribution':'normal'}},
                                 'prime_numbers':[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 
                                                  43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]}}
    
    
    def __init__(self, path, file, model = "mnl", halton_draws=300):
        self.halton_draws = halton_draws
        df = super().load_data(path, file)
        self.npersons = len(df)
        self.nalts = len(MultinomialLogit.model_config['Alternatives'])
        res = super().create_choice_attributes(df, MultinomialLogit.model_config)
        y = res['data']['y'].to_numpy()
        self.y_groupby = [y[i:i+self.nalts] for i in range(0,len(y),self.nalts)]
        if model == "mixed":
            self.X_list = MultinomialLogit.model_config['Mixedlogit']['fixed_coeffs']
            prime = MultinomialLogit.model_config['Mixedlogit']['prime_numbers']
            ndraws = self.npersons * self.halton_draws
            self.random_components = [{} for _ in range(self.npersons)]
            for k in MultinomialLogit.model_config['Mixedlogit']['random_coeffs'].keys():
                # slicing data used in identifying random coefficients to individual level
                x_k = [res['data'][k].to_numpy()[i:i+self.nalts] for i in range(0,len(y),self.nalts)] 
                if MultinomialLogit.model_config['Mixedlogit']['random_coeffs'][k]['conditionals'] is not None:
                    z_k = df[MultinomialLogit.model_config['Mixedlogit']['random_coeffs'][k]['conditionals']].to_numpy()
                
                s = random.choice(prime)
                sequence = super().halton_sequence(ndraws, s)
                temp = [sequence[i:i+self.halton_draws] for i in range(0,len(sequence),self.halton_draws)]
                for idx, item in enumerate(temp):
                    self.random_components[idx][k]= {}
                    self.random_components[idx][k]['x'] = x_k[idx]
                    self.random_components[idx][k]['draws'] = item
                    if MultinomialLogit.model_config['Mixedlogit']['random_coeffs'][k]['conditionals'] is not None:
                        self.random_components[idx][k]['z'] = np.concatenate(([1], z_k[idx]))
                    else: 
                        self.random_components[idx][k]['z'] = np.ones(1)
                prime.remove(s)
                
        if model == "mnl":
            self.X_list = list(MultinomialLogit.model_config['Attributes']) + res['xz']
            
        self.Xmat = res['data'][self.X_list].to_numpy()
    
    @staticmethod
    def mnl_groupby(groupby_pair):
        return np.sum(groupby_pair[0] * np.log((groupby_pair[1] / np.sum(groupby_pair[1]))))
    
    def mnl_log_likelihood(self, para):
        '''
        This method defines the data log-likelihood from a Multinomial Logit.
        '''
        xb = np.matmul(self.Xmat, para)
        xb = np.exp(xb)
        xb_groupby = [xb[i:i+self.nalts] for i in range(0,len(xb),self.nalts)]
        return (-1/len(xb)) * sum(list(map(self.mnl_groupby, zip(self.y_groupby, xb_groupby))))
    
    def nl_log_likelihood(self, para):
        pass
    
    @staticmethod
    def mixed_utils(xb_g):
        '''
        xb_g: an 1-D array with shape (self.nalts, ) 
        '''
        return xb_g / np.sum(xb_g)
    
    def mixed_groupby(self, groupby_pair):
        '''
        This method implements monte-carlo integration to compute choice probabilities of one person. 
        '''
        brandom = groupby_pair[-1] 
        rp = groupby_pair[-2]
        y = groupby_pair[0]
        xb = np.kron(np.ones(self.halton_draws),groupby_pair[1])
        start = 0
        end = 0
        for k, v in rp.items():
            info = MultinomialLogit.model_config['Mixedlogit']['random_coeffs'][k]
            end = end + len(v['z']) + 1
            coef = np.matmul(v['z'], brandom[start:(end-1)]) + brandom[end] * v['draws']
            if info.get('distribution', None) == "-lognormal":
                coef = -1 * np.exp(coef)
            
            xb = xb + np.kron(coef, v['x'])
            start = end
        xb = np.exp(xb)
        xb_groupby = [xb[i:i+self.nalts] for i in range(0,len(xb),self.nalts)]
        res = y * np.log((1/self.halton_draws)*sum(list(map(self.mixed_utils, xb_groupby))))
        return np.sum(res)
    
    def mixed_log_likelihood(self, para):
        '''
        Define log-likelihood function for mixed-logit
        '''
        bfixed = para[0:len(self.X_list)]
        brandom = [para[len(self.X_list):len(para)] for _ in range(self.npersons)]
        xb = np.matmul(self.Xmat, bfixed)
        xb_groupby = [xb[i:i+self.nalts] for i in range(0,len(xb),self.nalts)]
        pack = list(zip(self.y_groupby, xb_groupby, self.random_components, brandom))
        return (-1/len(xb)) * sum(list(map(self.mixed_groupby, pack)))
 
    def estimation(self, para):
        '''
        Parameters
        ----------
        para : array
            a 1-D array with the shape(k,), where k is the number of model parameters.
        Returns
        -------
        A dictionary of estimation results
        '''
        return super().optimization(self.mnl_log_likelihood, para)
    
if __name__ == '__main__':
    p =r"/kaggle/input/travel-choice"
    f = "assignment 1.txt"
    ## estimating binary models
    x = ['toll', 'median']
    z = ['female', 'age3050', 'distance', 'householdsize']
    interactions = [('toll', 'high_income'), ('toll', 'med_income')]
    route = BinaryLogit(p, f, "route", x=x, z=z, interactions=interactions)
    bini = np.zeros(len(route.X_list))
    res_binary = route.estimation(bini)
    
    ## estimating a MNL model
    import time
    start = time.time()
    mnl = MultinomialLogit(p, f, model="mnl")
    bini = np.zeros(len(mnl.X_list))
    res_mnl = mnl.estimation(bini)
    end = time.time()
    print(end - start)
    
    ## estimating a mixed-logit model
    mixed = MultinomialLogit(p, f, model="mixed")
    nparameters = len(mnl.X_list) + 15
    bini = np.zeros(nparameters)
    es_mixed = mixed.mixed_log_likelihood(bini) 

Optimization terminated successfully.
         Current function value: 0.528810
         Iterations: 34
         Function evaluations: 370
         Gradient evaluations: 37
Optimization terminated successfully.
         Current function value: 0.204334
         Iterations: 60
         Function evaluations: 558
         Gradient evaluations: 62
5.6868736743927


In [108]:
mixed.mixed_groupby(es_mixed[0])


-21.110070867537214

In [18]:
np.kron([1,2,3,4,5,6,7], [5,6,7])

array([ 5,  6,  7, 10, 12, 14, 15, 18, 21, 20, 24, 28, 25, 30, 35, 30, 36,
       42, 35, 42, 49])

In [32]:
l = mnl.mnl_log_likelihood(bini, mp=False)
l

0.2441360641484688

In [2]:
list(zip(np.array([1,2,3]), [4,5,6], [7,8,9]))

[(1, 4, 7), (2, 5, 8), (3, 6, 9)]

In [24]:
p = route.halton_sequence(10, 2)
p

array([0.0533552, 0.1783552, 0.0064802, 0.4283552, 0.9283552, 0.5064802,
       0.3033552, 0.6783552, 0.8033552, 0.5533552])

In [9]:
len(mnl.halton['price'])


131400

In [11]:
mnl.Xmat.shape

(3942, 3)

In [8]:
l = [1,2,3,4,5,6]
l[0:2]
l[2:len(l)]

[3, 4, 5, 6]

In [38]:
np.log(sum([1,2]))

1.0986122886681098

In [45]:
l = [np.array([1,2,3]), np.array([4,5,6])]
sum(l)

array([5, 7, 9])