### Import necessary libraries

In [1]:
import pickle
import numpy as np
import pandas as pd

### Load data of **Pizza Next Door**:

In [2]:
data = pd.read_csv('../data/raw/sales_analysis.csv')
data['Order Date'] = pd.to_datetime(data['Order Date'])
print(data.info())
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19027 entries, 0 to 19026
Columns: 130 entries, Order Date to Grilled Dory Platter
dtypes: datetime64[ns](1), int64(129)
memory usage: 18.9 MB
None


Unnamed: 0,Order Date,Order Id,Vegetarian's Delight,BBQ Chicken Pizza,Cheesy Sausage Pizza,Chicken Supreme Pizza,Beef Pepperoni Pizza,French Fries,Cheese Balls,Classic Margherita Pizza,...,Mirinda,Mountain Dew,Rice,Moroccan Chicken Platter,Teriyaki chicken Platter,Mongolian beef Platter,Chicken Basil Platter,Honey Ginger Chicken Platter,Beef Basil Platter,Grilled Dory Platter
0,2023-05-24,1910735,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2023-05-24,1910036,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2023-05-24,1909803,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2023-05-24,1909441,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,2023-05-24,1909438,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Sort data by date

In [3]:
data = data.sort_values(by=['Order Date'])
data = data.reset_index(drop=True)
data.head()

Unnamed: 0,Order Date,Order Id,Vegetarian's Delight,BBQ Chicken Pizza,Cheesy Sausage Pizza,Chicken Supreme Pizza,Beef Pepperoni Pizza,French Fries,Cheese Balls,Classic Margherita Pizza,...,Mirinda,Mountain Dew,Rice,Moroccan Chicken Platter,Teriyaki chicken Platter,Mongolian beef Platter,Chicken Basil Platter,Honey Ginger Chicken Platter,Beef Basil Platter,Grilled Dory Platter
0,2021-07-04,192582,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2021-07-04,192733,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2021-07-04,192627,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2021-07-04,192645,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2021-07-04,192649,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


### Get Fooditem Names From Column

In [4]:
dish_titles = data.columns.to_numpy()[2:]
print(dish_titles.shape)


(128,)


### Assign a unique number to each Dish Title and create a dictionary for it (for future use)

In [5]:
dish_dict = {}
for i, dish in enumerate(dish_titles):
    dish_dict[dish] = i

with open('../data/processed/dish_dict_new.pkl', 'wb') as f:
    pickle.dump(dish_dict, f)

### Load saved dictionary and print it

In [6]:
dish_dict = pickle.load(open('../data/processed/dish_dict_new.pkl', 'rb'))
print(dish_dict)

{"Vegetarian's Delight": 0, 'BBQ Chicken Pizza': 1, 'Cheesy Sausage Pizza': 2, 'Chicken Supreme Pizza': 3, 'Beef Pepperoni Pizza': 4, 'French Fries': 5, 'Cheese Balls': 6, 'Classic Margherita Pizza': 7, 'Four Seasons Pizza': 8, 'Corn & Cheese Pizza': 9, 'Sauteed Garlic Mushroom': 10, 'Pizza Fun-Guy (Funghi)': 11, 'Hawaiian BBQ Chicken Pizza': 12, 'Chicken Cashew Nut Salad': 13, 'Water 500ml': 14, 'Chicken Wings': 15, 'Potato Wedges': 16, 'Single Chicken Burger': 17, 'Single Beef Burger': 18, 'Double Chicken Burger': 19, 'Double Beef Burger': 20, 'Pizza Seafood Marinara': 21, 'Prawn-Lime Pizza': 22, 'Chicken Cheese Balls': 23, 'Fried Calamari': 24, 'Tandoori Chicken Pizza': 25, 'Naga Beef Pizza': 26, 'Club Sandwich': 27, 'Smoked Chicken Sandwich': 28, 'Crispy Chicken Burger': 29, 'Chicken Caesar Salad': 30, 'Fish & Chips': 31, 'Teriyaki Chicken': 32, 'Mongolian Beef': 33, 'Moroccan Chicken': 34, 'Penne Alfredo': 35, 'Spaghetti Bolognese': 36, 'Fettuccine Alfredo': 37, 'Seafood Marinara'

### Check if there are duplicate order ids

In [7]:
order_ids = data['Order Id'].to_numpy()

# check if there are duplicate order ids
print("Are there duplicate order ids? ", len(order_ids) != len(set(order_ids)))

Are there duplicate order ids?  False


### Sum Up Order Quantities for each Unique Date

In [8]:
dates = data['Order Date'].to_numpy()
unique_dates = np.unique(dates)

print("Number of unique dates: ", len(unique_dates))

# save dates mapping
dates_dict = {}

# sum up the sales of each dish for each day
sales = np.zeros((len(unique_dates), len(dish_titles)))
for i, date in enumerate(unique_dates):
    sales[i] = np.sum(data[data['Order Date'] == date].to_numpy()[:, 2:], axis=0)
    dates_dict[date] = i

print(sales.shape)

# save date mapping to pickle
# print(dates_dict)
with open('../data/processed/dates_dict.pkl', 'wb') as f:
    pickle.dump(dates_dict, f)

Number of unique dates:  690
(690, 128)


### Save the Order Matrix

In [9]:
np.savetxt('../data/processed/order_matrix_new.csv', sales, delimiter=",")

### View 1st row of the Order Matrix

In [10]:
order_matrix = np.loadtxt('../data/processed/order_matrix_new.csv', delimiter=",")
print(order_matrix.shape)

print(order_matrix[0])

(690, 128)
[0. 1. 0. 2. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


### View all rows of the Order Matrix with 1st date

In [11]:
first_day = np.where(data['Order Date'] == unique_dates[0])
first_day_orders = data.iloc[first_day]

pd.set_option('display.max_columns', None)
print(first_day_orders)

print(np.sum(first_day_orders.to_numpy()[:, 2:], axis=0))

  Order Date  Order Id  Vegetarian's Delight  BBQ Chicken Pizza  \
0 2021-07-04    192582                     0                  0   
1 2021-07-04    192733                     0                  0   
2 2021-07-04    192627                     0                  0   
3 2021-07-04    192645                     0                  1   
4 2021-07-04    192649                     0                  0   

   Cheesy Sausage Pizza  Chicken Supreme Pizza  Beef Pepperoni Pizza  \
0                     0                      0                     1   
1                     0                      1                     0   
2                     0                      1                     0   
3                     0                      0                     0   
4                     0                      0                     0   

   French Fries  Cheese Balls  Classic Margherita Pizza  Four Seasons Pizza  \
0             0             0                         0                   0   
1     

#### Use Coerrelation to generate Adjacency Matrix of weighted edges depicting the relationship between food items

In [12]:
cleaned_data = data.drop(columns=['Order Id', 'Order Date'])
correlation_matrix = cleaned_data.corr().fillna(0)
correlation_matrix = np.abs(correlation_matrix)

# Calculate the threshold for the lowest 20% values
threshold = np.percentile(correlation_matrix, 20)

# Create a boolean mask for values above or equal to the threshold
mask = correlation_matrix >= threshold

# Keep only the values above or equal to the threshold, set the rest to 0
correlation_matrix_filtered = np.where(mask, correlation_matrix, 0)

print(correlation_matrix_filtered.shape)

# Save the filtered correlation matrix as a CSV file
np.savetxt('../data/processed/corr_matrix_filtered.csv',
           correlation_matrix_filtered, delimiter=",")



# cleaned_data = data.drop(columns=['Order Id', 'Order Date'])

# correlation_matrix = cleaned_data.corr()
# correlation_matrix = correlation_matrix.fillna(0)
# # correlation_matrix.to_csv('../data/processed/corr_matrix_with_index.csv')


# correlation_matrix = np.abs(correlation_matrix)
# print(correlation_matrix.shape)

# # Set lower 


# # save the correlation matrix as a csv file
# np.savetxt('../data/processed/corr_matrix.csv', correlation_matrix, delimiter=",")


# sorted_correlation_matrix = pd.DataFrame()

# # Step 3: Sort columns by maximum absolute correlation values (positive or negative)
# sorted_columns = sorted(correlation_matrix.columns, key=lambda x: correlation_matrix[x].abs().max(), reverse=True)

# # Step 4: Iterate over sorted columns and append to sorted correlation matrix
# for column in sorted_columns:
#     sorted_correlation_matrix[column] = correlation_matrix[column]

# # Step 5: Reindex sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix = sorted_correlation_matrix.reindex(sorted_columns)

# # Step 6: Reindex rows of sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix = sorted_correlation_matrix.reindex(sorted_columns)

# # Step 7: Reindex index and columns of sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix.index = sorted_columns
# sorted_correlation_matrix.columns = sorted_columns

# sorted_correlation_matrix.to_csv('../data/processed/sorted_corr_matrix_with_index.csv')

(128, 128)


### Make Adjacency Matrix by actually counting the number of times two food items are ordered together

In [13]:
co_matrix = np.zeros((len(dish_titles), len(dish_titles)))

for _, row in cleaned_data.iterrows():
    # get the indices of the dishes that were ordered
    ordered_dishes = np.where(row.to_numpy() == 1)[0]

    # Update the co-occurrence matrix based on ordered item combinations
    for i in range(len(ordered_dishes)):
        for j in range(i+1, len(ordered_dishes)):
            co_matrix[ordered_dishes[i], ordered_dishes[j]] += 1
            co_matrix[ordered_dishes[j], ordered_dishes[i]] += 1

co_occurrence_df = pd.DataFrame(co_matrix, index=dish_titles, columns=dish_titles)
print(co_occurrence_df.head())
print(co_occurrence_df.shape)

# save the co-occurrence matrix as a csv file
co_occurrence_df.to_csv('../data/processed/co_occurrence_matrix.csv', index=False, header=False)

                       Vegetarian's Delight  BBQ Chicken Pizza  \
Vegetarian's Delight                    0.0               10.0   
BBQ Chicken Pizza                      10.0                0.0   
Cheesy Sausage Pizza                    2.0               68.0   
Chicken Supreme Pizza                   7.0              117.0   
Beef Pepperoni Pizza                    2.0               56.0   

                       Cheesy Sausage Pizza  Chicken Supreme Pizza  \
Vegetarian's Delight                    2.0                    7.0   
BBQ Chicken Pizza                      68.0                  117.0   
Cheesy Sausage Pizza                    0.0                   47.0   
Chicken Supreme Pizza                  47.0                    0.0   
Beef Pepperoni Pizza                   32.0                   51.0   

                       Beef Pepperoni Pizza  French Fries  Cheese Balls  \
Vegetarian's Delight                    2.0          10.0          14.0   
BBQ Chicken Pizza               