## Markov Chain for Lunch Preferences at Duke

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("data.csv")

In [3]:
### data cleaning
col_mapper = {'What is your relationship to Duke?':'affinity', 'Where did you eat lunch today?':'curr', 'Where do you think you will eat lunch tomorrow?':'next'}
df.rename(columns=col_mapper, inplace=True)

In [4]:
print(df['affinity'].value_counts())
print(df['curr'].value_counts())
print(df['next'].value_counts())

Sophomore           46
Junior              23
Senior              22
Freshmen            13
Graduate Student     6
Faculty/Staff        4
Name: affinity, dtype: int64
Other                                     16
Tandoor                                   12
Ginger and Soy                             9
Sazon                                      9
Il Forno                                   8
Panera                                     6
Devil's Krafthouse                         6
Zweli's                                    5
Farmstead or Sprout                        5
Thyme                                      4
Skillet                                    4
Gothic Grill                               4
McDonalds                                  4
JB's Roast and Chops                       4
Pitchforks                                 4
Chef's Kitchen (Taste of San Fransico)     3
Red Mango                                  3
The Commons                                2
Twinnie's              

In [5]:
### create the transition matrix for all affinities

"""
create transition matrix using pd.crosstab function,
fill NaNs with 0s 
"""
# curr_freqs = df['curr'].value_counts(normalize=True)
# next_freqs = df['next'].value_counts(normalize=True)

temp = pd.concat([df['curr'], df['next']], axis=1)
t = pd.crosstab(temp['curr'], temp['next'], normalize='index')
t = t.fillna(0)

print("\nTransition Matrix:")
print(t)


Transition Matrix:
next                                        Cafe  \
curr                                               
Cafe                                    0.000000   
Chef's Kitchen (Taste of San Fransico)  0.000000   
Devil's Krafthouse                      0.000000   
Farmstead or Sprout                     0.000000   
Ginger and Soy                          0.000000   
Gothic Grill                            0.000000   
Il Forno                                0.000000   
JB's Roast and Chops                    0.000000   
Marketplace                             0.000000   
McDonalds                               0.000000   
Other                                   0.000000   
Panera                                  0.166667   
Pitchforks                              0.000000   
Red Mango                               0.000000   
Saladelia at Perkins                    0.000000   
Sazon                                   0.000000   
Skillet                                 0.00

In [6]:
### save transition matrix as csv
t.to_csv("transition.csv")

In [7]:
### define functions

def predict_future_lunch(curr_lunch, num_steps, transition_matrix=t):
    # params = today's lunch, x days in the future
    # ret = most likely lunch in x days
    t = transition_matrix**num_steps
    return t.loc[curr_lunch].idxmax()

def find_prob_of_future_lunch(curr_lunch, next_lunch, num_steps, transition_matrix=t):
    # params = today's lunch, next lunch, x days in the future, transition matrix
    # ret = prob of eating at next_lunch in x days 
    t = transition_matrix**num_steps
    curr_index = transition_matrix.index.get_loc(curr_lunch)
    next_index = transition_matrix.columns.get_loc(next_lunch)
    return t.iloc[curr_index, next_index]

In [8]:
### test functions 

print(predict_future_lunch('Sazon', 1, t))
print(find_prob_of_future_lunch("Tandoor", "Zweli's", 2, t))


Ginger and Soy
0.006944444444444444
