## Markov Chain for Lunch Preferences at Duke

In [49]:
import pandas as pd
import numpy as np

In [50]:
df = pd.read_csv("data.csv")

In [51]:
### data cleaning
col_mapper = {'What is your relationship to Duke?':'affinity', 'Where did you eat lunch today?':'curr', 'Where do you think you will eat lunch tomorrow?':'next'}
df.rename(columns=col_mapper, inplace=True)

In [52]:
print(df['affinity'].value_counts())
print(df['curr'].value_counts())
print(df['next'].value_counts())

Sophomore           41
Junior              16
Senior              15
Freshmen            13
Graduate Student     5
Faculty/Staff        2
Name: affinity, dtype: int64
Other                                     10
Tandoor                                    9
Sazon                                      9
Il Forno                                   8
Ginger and Soy                             8
Zweli's                                    5
Pitchforks                                 4
JB's Roast and Chops                       4
Farmstead or Sprout                        4
Gothic Grill                               4
Devil's Krafthouse                         4
Thyme                                      4
Chef's Kitchen (Taste of San Fransico)     3
Panera                                     3
Skillet                                    3
Marketplace                                2
The Commons                                2
Red Mango                                  2
Cafe                   

In [57]:
### create the transition matrix for all affinities

"""
create transition matrix using pd.crosstab function,
fill NaNs with 0s 
"""
# curr_freqs = df['curr'].value_counts(normalize=True)
# next_freqs = df['next'].value_counts(normalize=True)

temp = pd.concat([df['curr'], df['next']], axis=1)
t = pd.crosstab(temp['curr'], temp['next'], normalize='index')
t = t.fillna(0)

print("\nTransition Matrix:")
print(t)


Transition Matrix:
next                                    Chef's Kitchen (Taste of San Fransico)  \
curr                                                                             
Cafe                                                                       1.0   
Chef's Kitchen (Taste of San Fransico)                                     0.0   
Devil's Krafthouse                                                         0.5   
Farmstead or Sprout                                                        0.0   
Ginger and Soy                                                             0.0   
Gothic Grill                                                               0.0   
Il Forno                                                                   0.0   
JB's Roast and Chops                                                       0.0   
Marketplace                                                                0.0   
McDonalds                                                                  0.0

In [None]:
### save transition matrix as csv
t.to_csv("transition.csv")

In [69]:
### define functions

def predict_future_lunch(curr_lunch, num_steps, transition_matrix=t):
    # params = today's lunch, x days in the future
    # ret = most likely lunch in x days
    t = transition_matrix**num_steps
    return t.loc[curr_lunch].idxmax()

def find_prob_of_future_lunch(curr_lunch, next_lunch, num_steps, transition_matrix=t):
    # params = today's lunch, next lunch, x days in the future, transition matrix 
    # ret = prob of eating at next_lunch in x days 
    t = transition_matrix**num_steps
    curr_index = transition_matrix.index.get_loc(curr_lunch)
    next_index = transition_matrix.columns.get_loc(next_lunch)
    return t.iloc[curr_index, next_index]

In [75]:
### test functions 

print(predict_future_lunch('Sazon', 1, t))
print(find_prob_of_future_lunch("Tandoor", "Zweli's", 1, t))


Ginger and Soy
0.1111111111111111
