In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [21]:
# Load data
data = pd.read_csv('../data/restaurants.dat', delimiter=',')
# display every column of the dataframe
pd.set_option('display.max_columns', None)
data

Unnamed: 0,ID,rating,price,category_Chinese,category_Japanese,category_Korean,category_Indian,category_French,category_Mexican,category_Lebanese,category_Ethiopian,Asian,rest_lat,rest_lon,distance,downtown
0,0,1,4,0,1,0,0,0,0,0,0,1.0,57.994604,42.220972,71.735518,1.0
1,1,2,2,0,0,1,0,0,0,0,0,1.0,93.474454,50.549434,106.267205,0.0
2,2,3,3,0,0,0,0,0,0,0,1,0.0,94.902294,97.830520,136.298409,0.0
3,3,4,1,1,0,0,0,0,0,0,0,1.0,51.027966,69.152206,85.941147,0.0
4,4,4,3,0,0,0,0,0,0,0,0,0.0,37.657349,89.145620,96.773021,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,4,3,0,0,0,0,0,0,0,0,0.0,83.627288,9.511387,84.166441,0.0
96,96,1,1,0,0,0,0,0,0,0,0,0.0,25.475207,92.144641,95.601366,0.0
97,97,4,2,0,0,0,0,0,0,0,1,0.0,12.715702,27.657518,30.440555,1.0
98,98,4,4,1,0,0,0,0,0,0,0,1.0,31.367914,32.303213,45.027143,1.0


In [22]:
# Load data
data2 = pd.read_csv('../data/obs_choice.dat', delimiter=',')
# display every column of the dataframe
pd.set_option('display.max_columns', None)
data2

Unnamed: 0,user_lat,user_lon,logit_0,logit_1,logit_2,logit_3,logit_4,nested_0,nested_1,nested_2,nested_3,nested_4,cnl_0,cnl_1,cnl_2,cnl_3,cnl_4
0,29.658623,94.939628,10,37,54,44,75,28,34,10,48,94,44,37,16,70,34
1,11.143086,44.648300,54,87,53,74,70,30,88,93,39,82,54,11,10,39,63
2,59.937774,79.235164,35,42,40,76,21,12,12,99,16,44,35,30,44,82,26
3,89.859393,20.279733,82,94,51,54,34,9,50,26,35,39,10,76,82,44,78
4,22.618975,25.913081,40,43,37,30,23,40,82,35,35,97,2,35,42,91,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,47.243040,51.278399,10,0,34,30,27,90,40,30,94,23,29,44,30,53,30
9996,72.957844,86.053678,89,82,79,39,12,94,82,32,35,94,4,76,16,19,25
9997,5.057515,36.973306,6,3,20,81,54,16,79,88,32,24,35,94,34,54,11
9998,54.747056,93.820073,32,52,37,40,40,25,35,32,35,30,34,16,69,29,94


In [23]:
# min and max values of logit_0
min_logit_0 = data2['logit_0'].min()
max_logit_0 = data2['logit_0'].max()
print('min_logit_0:', min_logit_0)
print('max_logit_0:', max_logit_0)

min_logit_0: 0
max_logit_0: 99


In [24]:
beta_chinese_0 = 0.849
beta_ethiopian_0 = 0.489
beta_french_0 = 0.629
beta_indian_0 = 1.03
beta_japanese_0 = 1.28
beta_korean_0 = 0.753
beta_lebanese_0 = 0.853
beta_log_dist_0 = -0.602
beta_mexican_0 = 1.27
beta_price_0 = -0.4
beta_rating_0 = 0.743

In [25]:
import math

def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers

    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    # Compute differences in latitude and longitude
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Apply Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c

    return distance


In [26]:
# Compute the utilities for the 100 restaurant for one individual using the logit_0
user_0_lat = data2['user_lat'][0]
user_0_lon = data2['user_lon'][0]
rest_0_lat = data['rest_lat']
rest_0_lon = data['rest_lon']

# Initialize a list to store utilities for each restaurant
utilities = []

for i in range(100):
    distance = haversine(user_0_lat, user_0_lon, rest_0_lat[i], rest_0_lon[i])
    #Compute U_0 for restaurant 0, U_1 for restaurant 1, etc...
    U_i = beta_chinese_0 * data['category_Chinese'][i] + beta_ethiopian_0 * data['category_Ethiopian'][i] + \
           beta_french_0 * data['category_French'][i] + beta_indian_0 * data['category_Indian'][i] + \
           beta_japanese_0 * data['category_Japanese'][i] + beta_korean_0 * data['category_Korean'][i] + \
           beta_lebanese_0 * data['category_Lebanese'][i] + beta_mexican_0 * data['category_Mexican'][i] + \
           beta_price_0 * data['price'][i] + beta_rating_0 * data['rating'][i] + \
           beta_log_dist_0 * np.log(distance)
    
    # Append the utility to the list
    utilities.append(U_i)
print(utilities)

[-4.713909341846072, -3.8899458618135796, -3.8333720935711186, -1.4368530602349918, -2.4089291404238034, -5.7440537887643055, -4.235683132151159, -2.4782238989007235, -2.4511280173914107, -1.4112839931568177, -1.5094239043151294, -1.9661516327625916, -2.244601296950728, -3.753146247283814, -2.713486912386337, -3.8146331271242904, -0.5104967727744341, -2.150088883894811, -1.6901790601198434, -2.4990020644382094, -5.550616969170044, -1.4978097558924248, -3.5389815414033245, -3.199186526068381, -1.9744690966996812, -0.6905812077581985, -1.9650728303491998, -1.9717615874222645, -4.1467809089715155, -1.3463710338359376, -0.4986061794436507, -1.4847197155765808, -1.8809477447756948, -3.1317403260489596, -0.0428781282034576, -1.3461639645650245, -2.0319964276149864, -0.9100552106501114, -3.9923149346166564, -1.7750023426767694, -0.6240055788962726, -3.254935156456535, -1.4750156926573053, -4.7765043033412855, -0.3135021837345171, -2.753205827869306, -2.5477387656825075, -3.1480266410509152, -

In [27]:
# Initialize a dictionary to store utilities for all users
all_utilities = {}

# Iterate over each user
for user_idx in range(10000):
    # Get the latitude and longitude of the current user
    user_lat = data2['user_lat'][user_idx]
    user_lon = data2['user_lon'][user_idx]
    rest_lat = data['rest_lat']
    rest_lon = data['rest_lon']
    
    # Initialize a list to store utilities for the current user
    user_utilities = []
    
    # Iterate over each restaurant
    for i in range(100):
        # Compute the distance between the user and the restaurant
        distance = haversine(user_lat, user_lon, rest_lat[i], rest_lon[i])
        
        # Compute utility for the ith restaurant for the current user
        U_i = beta_chinese_0 * data['category_Chinese'][i] + \
              beta_ethiopian_0 * data['category_Ethiopian'][i] + \
              beta_french_0 * data['category_French'][i] + \
              beta_indian_0 * data['category_Indian'][i] + \
              beta_japanese_0 * data['category_Japanese'][i] + \
              beta_korean_0 * data['category_Korean'][i] + \
              beta_lebanese_0 * data['category_Lebanese'][i] + \
              beta_mexican_0 * data['category_Mexican'][i] + \
              beta_price_0 * data['price'][i] + \
              beta_rating_0 * data['rating'][i] + \
              beta_log_dist_0 * np.log(distance)
        
        # Append the utility to the list for the current user
        user_utilities.append(U_i)
    
    # Store the list of utilities for the current user in the dictionary
    all_utilities[user_idx] = user_utilities

# Now 'all_utilities' is a dictionary where keys are user indices and values are lists of utilities
# You can access the utilities associated with any specific user by using their index as the key, for example:
# utilities_for_user_3000 = all_utilities[3000]


In [28]:
# Initialize a dictionary to store utilities for all users
all_probabilities = {}

# Iterate over each user
for user_idx in range(10000):
    user_utilities = all_utilities[user_idx]

    # Compute the sum of exponentials of utilities
    sum_exp_utilities = sum([math.exp(U_i) for U_i in user_utilities])

    # Compute the probability of choosing each restaurant
    user_probabilities = [math.exp(U_i) / sum_exp_utilities for U_i in user_utilities]

    # Store the list of probabilities for the current user in the dictionary
    all_probabilities[user_idx] = user_probabilities