### Import necessary libraries

In [1]:
import pickle
import numpy as np
import pandas as pd

### Load data of **Pizza Next Door**:

In [4]:
data = pd.read_csv('../data/raw/pizzanextdoor_final.csv')
print(data.info())
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23237 entries, 0 to 23236
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   orderDate      23237 non-null  object
 1   categoryTitle  23158 non-null  object
 2   dishTitle      23237 non-null  object
 3   orderQuantity  23237 non-null  int64 
 4   orderPrice     23237 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 907.8+ KB
None


Unnamed: 0,orderDate,categoryTitle,dishTitle,orderQuantity,orderPrice
0,2021-07-04,Thin-Crust Pizza,Beef Pepperoni Pizza,1,340
1,2021-07-04,Thin-Crust Pizza,Chicken Supreme Pizza,2,1285
2,2021-07-04,Thin-Crust Pizza,BBQ Chicken Pizza,1,940
3,2021-07-04,Thin-Crust Pizza,Classic Margherita Pizza,1,285
4,2021-07-05,Thin-Crust Pizza,Chicken Supreme Pizza,2,1170


### Remove unnecessary columns
1. **categoryTitle** - Does not need to be included in the analysis
2. **orderPrice** - Does not need in this analysis

In [5]:
data.drop(['categoryTitle', 'orderPrice'], axis=1, inplace=True)
data.head()

Unnamed: 0,orderDate,dishTitle,orderQuantity
0,2021-07-04,Beef Pepperoni Pizza,1
1,2021-07-04,Chicken Supreme Pizza,2
2,2021-07-04,BBQ Chicken Pizza,1
3,2021-07-04,Classic Margherita Pizza,1
4,2021-07-05,Chicken Supreme Pizza,2


### Get the unique Dish Titles:

In [6]:
print(data['dishTitle'].unique())

['Beef Pepperoni Pizza' 'Chicken Supreme Pizza' 'BBQ Chicken  Pizza'
 'Classic Margherita Pizza' 'Pizza Fun-Guy (Funghi)' 'Buffalo Wings'
 'Teriyaki Chicken Wings' 'Chicken Cheese Balls' 'Cheesy Sausage Pizza'
 'Peri Peri Beef Burger' 'Hawaiian BBQ Chicken Pizza' 'French Fries'
 'Naga Wings' 'Pizza Seafood Marinara' 'Garlic Mushroom with Chicken'
 "Vegetarian's Delight" 'Sauteed Garlic Mushroom' 'BBQ Wings'
 'Potato Wedges' 'Classic Chicken Burger' 'Chicken Cheese Burger'
 'Cheese Balls' 'Four Seasons Pizza' 'Prawn-Lime Pizza'
 'Beef Cheese Burger' 'Water 500ml' 'Classic Beef Burger'
 'BBQ Chicken Burger' 'Corn & Cheese Pizza' 'Naga Beef Pizza'
 'Club Sandwich' 'Tandoori Chicken Pizza' 'Prawn On Toast'
 'Fried Calamari' 'Fish & Chips' 'Seafood Marinara' 'Coke Can'
 'Sprite Can' 'Smoked Chicken Sandwich' 'Mongolian Beef'
 'Teriyaki Chicken' 'Coke' 'Fanta Can' 'Sprite' 'Espresso' 'Cappuccino'
 'Moroccan Chicken' 'Fanta' 'Americano' 'Spaghetti Bolognese'
 'Flavored Latte' 'Blueberry Chees

### Sort the Dish Titles in alphabetical order

In [8]:
dish_titles = data['dishTitle'].str.lower().unique()
dish_titles.sort()
print(dish_titles)
print("Total number of dishes: {}".format(len(dish_titles)))

['add on (butter)' 'almond cookie' 'americano' 'americano misto'
 'apple soda' 'basic iftar platter' 'bbq chicken  pizza'
 'bbq chicken burger' 'bbq wings' 'beef basil platter'
 'beef cheese burger' 'beef kala bhuna' 'beef pepperoni pizza'
 'beef steak (300+ gms)' 'blueberry cheese cake'
 'blueberry cheesecake shake' 'brownie' 'brownie with ice cream'
 'buffalo wings' 'butter rice' 'caffe mocha' 'cappuccino' 'ceylon supreme'
 'ceylon supreme/premium' 'cheese balls' 'cheese sandwich'
 'cheesy sausage pizza' 'chicken & chips' 'chicken basil platter'
 'chicken butterfly' 'chicken caesar salad' 'chicken cashew nut salad'
 'chicken cheese balls' 'chicken cheese burger' 'chicken fiesta'
 'chicken pasta salad' 'chicken satay' 'chicken supreme pizza'
 'chicken wings' 'chocolate chips cookie' 'chocolate mousse'
 'chocolate shake' 'classic bbq' 'classic beef burger'
 'classic chicken burger' 'classic english breakfast'
 'classic english tea' 'classic margherita pizza' 'club sandwich' 'coke'
 'co

### Assign a unique number to each Dish Title and create a dictionary for it (for future use)

In [6]:
dish_dict = {}
for i, dish in enumerate(dish_titles):
    dish_dict[dish] = i

with open('../data/processed/dish_dict.pkl', 'wb') as f:
    pickle.dump(dish_dict, f)

### Load saved dictionary and print it

In [7]:
dish_dict = pickle.load(open('../data/processed/dish_dict.pkl', 'rb'))
print(dish_dict)

{'Americano': 0, 'Apple Soda': 1, 'BBQ Chicken  Pizza': 2, 'BBQ Chicken Burger': 3, 'Beef Pepperoni Pizza': 4, 'Blueberry Cheese Cake': 5, 'Blueberry Cheesecake Shake': 6, 'Brownie with Ice Cream': 7, 'Buffalo Wings': 8, 'Caffe Mocha': 9, 'Cappuccino': 10, 'Cheese Balls': 11, 'Cheesy Sausage Pizza': 12, 'Chicken Butterfly': 13, 'Chicken Caesar Salad': 14, 'Chicken Cashew Nut Salad': 15, 'Chicken Cheese Balls': 16, 'Chicken Supreme Pizza': 17, 'Chocolate Mousse': 18, 'Classic Beef Burger': 19, 'Classic Chicken Burger': 20, 'Classic English Breakfast': 21, 'Classic Margherita Pizza': 22, 'Club Sandwich': 23, 'Coke': 24, 'Coke Can': 25, 'Continental Light Breakfast': 26, 'Cookie': 27, 'Corn & Cheese Pizza': 28, 'Cream of Mushroom': 29, 'Croissant': 30, 'Croissant Sandwich': 31, 'Doppio': 32, 'Earl Grey': 33, 'Espresso': 34, 'Fanta': 35, 'Fanta Can': 36, 'Fettuccine Baked Pasta': 37, 'Fettuccine Creme Lime-e': 38, 'Fish & Chips': 39, 'Flavored Latte': 40, 'Four Seasons Pizza': 41, 'French 

### Replace the Dish Titles with their corresponding numbers

In [8]:
data['dishTitle'] = data['dishTitle'].map(dish_dict)

data.head()

Unnamed: 0,orderDate,dishTitle,orderQuantity
0,2021-08-07,45,2
1,2021-08-07,67,1
2,2021-08-07,77,1
3,2021-08-07,23,1
4,2021-08-07,16,3


#### Get unique order dates

In [9]:
# unique order dates
unique_dates = data['orderDate'].unique()
unique_dates.sort()
print(unique_dates)

# save unique dates dict for later use
date_dict = {}
for i, date in enumerate(unique_dates):
    date_dict[date] = i

with open('../data/processed/date_dict.pkl', 'wb') as f:
    pickle.dump(date_dict, f)

# load date dict
date_dict = pickle.load(open('../data/processed/date_dict.pkl', 'rb'))
print(date_dict)
    

['2021-08-07' '2021-08-08' '2021-08-09' '2021-08-10' '2021-08-11'
 '2021-08-12' '2021-08-13' '2021-08-14' '2021-08-15' '2021-08-16'
 '2021-08-17' '2021-08-18' '2021-08-19' '2021-08-20' '2021-08-21'
 '2021-08-22' '2021-08-23' '2021-08-24' '2021-08-25' '2021-08-26'
 '2021-08-27' '2021-08-28' '2021-08-29' '2021-08-30' '2021-08-31'
 '2021-09-01' '2021-09-02' '2021-09-03' '2021-09-04' '2021-09-05'
 '2021-09-06' '2021-09-07' '2021-09-08' '2021-09-09' '2021-09-10'
 '2021-09-11' '2021-09-12' '2021-09-13' '2021-09-14' '2021-09-15'
 '2021-09-16' '2021-09-17' '2021-09-18' '2021-09-19' '2021-09-20'
 '2021-09-21' '2021-09-22' '2021-09-23' '2021-09-24' '2021-09-25'
 '2021-09-26' '2021-09-27' '2021-09-28' '2021-09-29' '2021-09-30'
 '2021-10-01' '2021-10-02' '2021-10-03' '2021-10-04' '2021-10-05'
 '2021-10-06' '2021-10-07' '2021-10-08' '2021-10-09' '2021-10-10'
 '2021-10-11' '2021-10-12' '2021-10-13' '2021-10-14' '2021-10-15'
 '2021-10-16' '2021-10-17' '2021-10-18' '2021-10-19' '2021-10-20'
 '2021-10-

#### Make a unique number of dates times unique number of dishes matrix

In [10]:
# make order matrix of shape (num_unique_dates, num_unique_dishes)
# each entry is the number of orders for that dish on that date 
# (0 if no orders oherwise add order quantity from the orderQuantity column)
order_matrix = np.zeros((len(unique_dates), len(dish_titles)))
print(order_matrix.shape)

for i, date in enumerate(unique_dates):
    for dish in data[data['orderDate'] == date]['dishTitle']:
        order_matrix[i, dish] += data[(data['orderDate'] == date) & (data['dishTitle'] == dish)]['orderQuantity'].values[0]


# save order matrix in a csv file
np.savetxt('../data/processed/order_matrix.csv', order_matrix, delimiter=',')

(126, 95)


#### Load food Adjacency Matrix from "Food Adj.csv" file

In [11]:
# Load food Adjacency Matrix from "Food Adj.csv" file
food_adj = np.loadtxt('../data/processed/Food Adj.csv', delimiter=',')
print(food_adj.shape)

# save food adjacency matrix in a csv file with 
np.savetxt('../data/processed/food_adj.csv', food_adj, delimiter=',')

(95, 95)
