## Markov Chain for Lunch Preferences at Duke

In [121]:
import pandas as pd
import numpy as np

In [122]:
df = pd.read_csv("data.csv")

In [123]:
### data cleaning
col_mapper = {'What is your relationship to Duke?':'affinity', 'Where did you eat lunch today?':'curr', 'Where do you think you will eat lunch tomorrow?':'next'}
df.rename(columns=col_mapper, inplace=True)

## fix spelling 
df['curr'].mask(df['curr'] == "Chef's Kitchen (Taste of San Fransico)", "Chef's Kitchen", inplace=True)
df['next'].mask(df['next'] == "Chef's Kitchen (Taste of San Fransico)", "Chef's Kitchen", inplace=True)
df['curr'].mask(df['curr'] == "JB's Roast and Chops", "JB's", inplace=True)
df['next'].mask(df['next'] == "JB's Roast and Chops", "JB's", inplace=True)
df['curr'].mask(df['curr'] == "McDonalds", "McDonald's", inplace=True)
df['next'].mask(df['next'] == "McDonalds", "McDonald's", inplace=True)

## map absorbing states to other
df['curr'].mask(df['curr'] == 'Freeman Center Cafe', 'Other', inplace=True)
df['next'].mask(df['next'] == 'Freeman Center Cafe', 'Other', inplace=True)
df['curr'].mask(df['curr'] == 'Saladelia at Perkins', 'Other', inplace=True)
df['next'].mask(df['next'] == 'Saladelia at Perkins', 'Other', inplace=True)

In [124]:
print(df['curr'].unique())

['Il Forno' "Chef's Kitchen" 'Farmstead or Sprout' 'The Commons' 'Tandoor'
 "Zweli's" 'Gothic Grill' 'Pitchforks' 'Sazon' "JB's" 'Other'
 'Ginger and Soy' 'Panera' "McDonald's" 'Skillet' "Devil's Krafthouse"
 'Thyme' 'Red Mango' "Twinnie's" 'Marketplace' 'Cafe']


In [125]:
### group by type of restaurant

d = {'fast_casual': ["Chef's Kitchen", "Farmstead or Sprout", "Ginger and Soy", "Il Forno", "JB's", "Skillet", "Sazon", "Tandoor", "Thyme"],
"sit_down": ["The Commons", "Gothic Grill", "Devil's Krafthouse", "Nasher Cafe", "Marketplace", "Pitchforks", "Zweli's"],
"cafes":["Cafe", "Twinnie's"],
"fast_food": ["McDonald's", "Panera", "Red Mango"],
"other": ["Other"]
}

def map_restaurant_to_type(restaurant):
    for r_type, restaurants in d.items():
        if restaurant in restaurants:
            return r_type
    return restaurant # check for errors here

df['curr_type'] = df['curr'].map(map_restaurant_to_type)
df['next_type'] = df['next'].map(map_restaurant_to_type)

In [126]:
print(df['affinity'].value_counts())
print(df['curr'].value_counts())
print(df['next'].value_counts())
print(df['curr_type'].value_counts())
print(df['next_type'].value_counts())

Sophomore           46
Junior              23
Senior              22
Freshmen            13
Graduate Student     6
Faculty/Staff        4
Name: affinity, dtype: int64
Other                  17
Tandoor                12
Ginger and Soy          9
Sazon                   9
Il Forno                8
Panera                  6
Devil's Krafthouse      6
Zweli's                 5
Farmstead or Sprout     5
Gothic Grill            4
Pitchforks              4
JB's                    4
McDonald's              4
Skillet                 4
Thyme                   4
Chef's Kitchen          3
Red Mango               3
The Commons             2
Twinnie's               2
Marketplace             2
Cafe                    1
Name: curr, dtype: int64
Ginger and Soy         25
Other                  12
Tandoor                11
Il Forno                7
Thyme                   6
Farmstead or Sprout     6
Sazon                   6
JB's                    5
Gothic Grill            5
Panera                  5
Ch

In [127]:
### create the transition matrix for whole dataset

"""
create transition matrix using pd.crosstab function,
fill NaNs with 0s 
"""
# curr_freqs = df['curr'].value_counts(normalize=True)
# next_freqs = df['next'].value_counts(normalize=True)

temp = pd.concat([df['curr'], df['next']], axis=1)
t = pd.crosstab(temp['curr'], temp['next'], normalize='index')
t_check = pd.crosstab(temp['curr'], temp['next'], normalize='index', margins=True)
t = t.fillna(0)

print("\nTransition Matrix:")
print(t)

print("\n Check rows sum to 1")
print(t_check)


Transition Matrix:
next                     Cafe  Chef's Kitchen  Devil's Krafthouse  \
curr                                                                
Cafe                 0.000000        1.000000                 0.0   
Chef's Kitchen       0.000000        0.000000                 0.0   
Devil's Krafthouse   0.000000        0.333333                 0.0   
Farmstead or Sprout  0.000000        0.000000                 0.2   
Ginger and Soy       0.000000        0.111111                 0.0   
Gothic Grill         0.000000        0.000000                 0.0   
Il Forno             0.000000        0.000000                 0.0   
JB's                 0.000000        0.000000                 0.0   
Marketplace          0.000000        0.000000                 0.5   
McDonald's           0.000000        0.250000                 0.0   
Other                0.000000        0.000000                 0.0   
Panera               0.166667        0.000000                 0.0   
Pitchforks    

In [128]:
### save transition matrix as csv
t.to_csv("transition.csv")

In [129]:
### create the transition matrix for grouping by restaurant type

"""
create transition matrix using pd.pivot_table function,
fill NaNs with 0s 
"""
temp_group = pd.concat([df['curr_type'], df['next_type']], axis=1)
t_group = pd.crosstab(temp_group['curr_type'], temp_group['next_type'], normalize='index')

print("\nTransition Matrix:")
print(t_group)


Transition Matrix:
next_type       cafes  fast_casual  fast_food     other  sit_down
curr_type                                                        
cafes        0.333333     0.333333   0.333333  0.000000  0.000000
fast_casual  0.000000     0.724138   0.068966  0.068966  0.137931
fast_food    0.076923     0.538462   0.153846  0.153846  0.076923
other        0.000000     0.529412   0.058824  0.294118  0.117647
sit_down     0.043478     0.565217   0.130435  0.043478  0.217391


In [130]:
### save grouped transition matrix as csv
t_group.to_csv("transition_grouped_by_type.csv")

In [131]:
### define functions

def predict_future_lunch(curr_lunch, num_steps, transition_matrix):
    # params = today's lunch, x days in the future
    # ret = most likely lunch in x days
    t = transition_matrix.copy()
    t_array = t.to_numpy()  # Convert DataFrame to numpy array
    t_result_array = np.linalg.matrix_power(t_array, num_steps)  # Perform matrix exponentiation
    t_result = pd.DataFrame(t_result_array, index=transition_matrix.index, columns=transition_matrix.columns)  # Convert numpy array back to DataFrame
    return t_result.loc[curr_lunch].idxmax()


def find_prob_of_future_lunch(curr_lunch, next_lunch, num_steps, transition_matrix):
    # params = today's lunch, next lunch, x days in the future, transition matrix
    # ret = prob of eating at next_lunch in x days 
    t = transition_matrix.copy()
    t_array = t.to_numpy()  # Convert DataFrame to numpy array
    t_result_array = np.linalg.matrix_power(t_array, num_steps)  # Perform matrix exponentiation
    t_result = pd.DataFrame(t_result_array, index=transition_matrix.index, columns=transition_matrix.columns)  # Convert numpy array back to DataFrame
    curr_index = t_result.index.get_loc(curr_lunch)
    next_index = t_result.columns.get_loc(next_lunch)
    return t_result.iloc[curr_index, next_index]

def limit_to_infinity(transition_matrix):
    t_array = transition_matrix.to_numpy()  # Convert DataFrame to numpy array
    t_result_array = np.linalg.matrix_power(t_array, 1000)  # Perform matrix exponentiation
    t_result = pd.DataFrame(t_result_array, index=transition_matrix.index, columns=transition_matrix.columns)  # Convert numpy array back to DataFrame
    return t_result

In [132]:
### test functions 

print(predict_future_lunch('Panera', 1, t))
print(find_prob_of_future_lunch("Tandoor", "Ginger and Soy", 100, t))

print(predict_future_lunch('sit_down', 1, t_group))
print(find_prob_of_future_lunch("fast_casual", "sit_down", 1, t_group))


Cafe
0.20673671622908316
fast_casual
0.13793103448275862


In [133]:
### get limits to infinity
t_temp = t.copy()
t_group_temp = t_group.copy()
limit_to_infinity_t = limit_to_infinity(t_temp)
limit_to_infinity_t_group = limit_to_infinity(t_group_temp)
# print(limit_to_infinity_t.iloc[1,:])
print(limit_to_infinity_t_group.iloc[1,:])

next_type
cafes          0.019364
fast_casual    0.659899
fast_food      0.089267
other          0.092489
sit_down       0.138982
Name: fast_casual, dtype: float64


In [134]:
limit_to_infinity_t.to_csv("limit_to_inf_full_data.csv")
limit_to_infinity_t_group.to_csv("limit_to_inf_grouped_by_type.csv")

**Equilibrium vector for FULL dataset:**

[0.009119, 0.056344, 0.036886, 0.091115, 0.206737, 0.057687, 0.035231, 0.035130, 0.036829, 0.047838, 0.000000, 0.054711, 0.022407, 0.000000, 0.056263, 0.047660, 0.011915, 0.083682, 0.025526, 0.083682, 0.001241]

**Equilibrium vector for GROUPED dataset:**
[0.019364, 0.659899, 0.089267,  0.092489, 0.138982]

