### Import necessary libraries

In [1]:
import pickle
import numpy as np
import pandas as pd
import yaml

### Load data of **Pizza Next Door**:

In [2]:
config = None
with open(
        f'../configs/ustgcn_config.yaml',
        'r', encoding='utf8'
    ) as config_file:
        config = yaml.safe_load(config_file)
config

{'data_params': {'adj_path': 'data/processed/co_occurrence_matrix.csv',
  'content_path': 'data/processed/order_matrix_new.csv',
  'dates_dict_path': 'data/processed/dates_dict.pkl',
  'dish_dict_path': 'data/processed/dish_dict_new.pkl',
  'discard_items_path': 'data/raw/discard_items.csv',
  'processed_data_path': 'data/processed/order_matrix_new.csv'},
 'exp_params': {'batch_size': 256,
  'device': 'cuda',
  'epochs': 500,
  'learning_rate': 0.001,
  'num_gnn_layers': 3,
  'test_model_path': '/content/Restaurant-Sales-Prediction-USTGCN/work_dir/logs/run_20'},
 'logging_params': {'last_saved_model': '/content/Restaurant-Sales-Prediction-USTGCN/work_dir/logs/run_20',
  'work_dir': '/content/Restaurant-Sales-Prediction-USTGCN/work_dir'},
 'model_params': {'num_days': 30,
  'pred_len': 7,
  'test_end': 1059,
  'test_start': 720,
  'test_stride': 7,
  'train_end': 719,
  'train_start': 1,
  'train_stride': 1}}

In [10]:
discard_items_path = "../"+config["data_params"]["discard_items_path"]
discard_items_df = pd.read_csv(discard_items_path)
discard_items_df

Unnamed: 0,Selected Menu,Discard
0,1. Kacchi Basmati ( P1 ),
1,1. Kacchi Basmoti ( P3 ),
2,1. Kacchi Basmati ( P5 ),
3,2. Kacchi Basmati ( P1 ),
4,2. Kacchi Basmoti ( P3 ),
...,...,...
131,Rose Apple Juice,
132,Oreo Cold Coffee,
133,KitKat Cold Coffee,
134,Special Faluda,


In [11]:
filtered_df = discard_items_df[discard_items_df['Discard'] == 1]
discard_items = filtered_df['Selected Menu']
columns_to_drop = discard_items.tolist()

In [4]:
data = pd.read_csv('../data/raw/shop-data-101000.csv')
print(data.shape)
data = data.drop(columns=columns_to_drop, errors='ignore')
print(data.shape)
data['Order Date'] = pd.to_datetime(data['Order Date'])
print(data.info())
data.head()

(113002, 138)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113002 entries, 0 to 113001
Columns: 138 entries, Order Date to Fruit salada
dtypes: int64(137), object(1)
memory usage: 119.0+ MB
None


Unnamed: 0,Order Date,Order Id,1. Kacchi Basmati ( P1 ),1. Kacchi Basmoti ( P3 ),1. Kacchi Basmati ( P5 ),2. Kacchi Basmati ( P1 ),2. Kacchi Basmoti ( P3 ),2. Kacchi Basmati ( P5 ),3. Kacchi Basmati ( P1 ),3. Kacchi Basmati ( P3 ),...,Papaya Juice,Pineapple Juice,Raw Mango,Ripe Mango,Olive Juice,Rose Apple Juice,Oreo Cold Coffee,KitKat Cold Coffee,Special Faluda,Fruit salada
0,2023-06-06,1983957,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2023-06-06,1983956,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2023-06-06,1983949,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2023-06-06,1983927,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2023-06-06,1983911,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Sort data by date

In [12]:
data = data.sort_values(by=['Order Date'])
data = data.reset_index(drop=True)
data.head()

Unnamed: 0,Order Date,Order Id,1. Kacchi Basmati ( P1 ),1. Kacchi Basmoti ( P3 ),1. Kacchi Basmati ( P5 ),2. Kacchi Basmati ( P1 ),2. Kacchi Basmoti ( P3 ),2. Kacchi Basmati ( P5 ),3. Kacchi Basmati ( P1 ),3. Kacchi Basmati ( P3 ),...,Papaya Juice,Pineapple Juice,Raw Mango,Ripe Mango,Olive Juice,Rose Apple Juice,Oreo Cold Coffee,KitKat Cold Coffee,Special Faluda,Fruit salada
0,2020-06-04,61456,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-06-04,61485,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-06-04,61460,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-06-04,61461,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-06-04,61464,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Get Fooditem Names From Column

In [13]:
dish_titles = data.columns.to_numpy()[2:]
print(dish_titles.shape)


(136,)


### Assign a unique number to each Dish Title and create a dictionary for it (for future use)

In [14]:
dish_dict = {}
for i, dish in enumerate(dish_titles):
    dish_dict[dish] = i

with open('../data/processed/dish_dict_new.pkl', 'wb') as f:
    pickle.dump(dish_dict, f)

### Load saved dictionary and print it

In [15]:
dish_dict = pickle.load(open('../data/processed/dish_dict_new.pkl', 'rb'))
print(dish_dict)

{'1. Kacchi Basmati ( P1 )': 0, '1. Kacchi Basmoti ( P3 )': 1, '1. Kacchi Basmati ( P5 )': 2, '2. Kacchi Basmati ( P1 )': 3, '2. Kacchi Basmoti ( P3 )': 4, '2. Kacchi Basmati ( P5 )': 5, '3. Kacchi Basmati ( P1 )': 6, '3. Kacchi Basmati ( P3 )': 7, '3. Kacchi Basmati ( P5 )': 8, 'Achari Kabab': 9, 'Chicken Reshmi Kabab': 10, 'Chicken Hariali': 11, 'Chicken Tikka': 12, 'Mutton Boti Kabab': 13, 'Chicken Roast': 14, 'Jali Kabab': 15, 'Mutton Rezala': 16, 'Plan Naan': 17, 'Butter Naan': 18, 'Garlic Naan': 19, 'Tilli Naan': 20, 'Kashmiri Naan': 21, 'Afgani Naan': 22, 'Nawabiana Special Naan': 23, 'Firni': 24, 'Jorda': 25, 'Water ( Large )': 26, 'Burhani ( 1 Glass )': 27, 'Chicken chap': 28, 'Chicken Taoya Jhal': 29, 'Chicken Tandoori': 30, 'Morog polao (Dim)': 31, 'Kacchi set': 32, 'Polao set': 33, 'Set 01': 34, 'Set 02': 35, 'Set 03': 36, 'Set 04': 37, 'Pepsi 400mg': 38, 'Drinks': 39, 'Chicken Boti Taoya': 40, 'Kacchi': 41, 'Garlic chilli naan': 42, 'Burhani(half litter)': 43, 'Burhani(1 l

### Check if there are duplicate order ids

In [16]:
order_ids = data['Order Id'].to_numpy()

# check if there are duplicate order ids
print("Are there duplicate order ids? ", len(order_ids) != len(set(order_ids)))

Are there duplicate order ids?  False


### Sum Up Order Quantities for each Unique Date

In [17]:
dates = data['Order Date'].to_numpy()
unique_dates = np.unique(dates)

print("Number of unique dates: ", len(unique_dates))

# save dates mapping
dates_dict = {}

# sum up the sales of each dish for each day
sales = np.zeros((len(unique_dates), len(dish_titles)))
for i, date in enumerate(unique_dates):
    sales[i] = np.sum(data[data['Order Date'] == date].to_numpy()[:, 2:], axis=0)
    dates_dict[date] = i

print(sales.shape)

# save date mapping to pickle
# print(dates_dict)
with open('../data/processed/dates_dict.pkl', 'wb') as f:
    pickle.dump(dates_dict, f)

Number of unique dates:  1059
(1059, 136)


### Save the Order Matrix

In [18]:
np.savetxt('../data/processed/order_matrix_new.csv', sales, delimiter=",")

### View 1st row of the Order Matrix

In [19]:
order_matrix = np.loadtxt('../data/processed/order_matrix_new.csv', delimiter=",")
print(order_matrix.shape)

print(order_matrix[0])

(1059, 136)
[21.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  8.  0.  0.  1.  0.  0.  0.  0.  1.  0.  3.  0.  5.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


### View all rows of the Order Matrix with 1st date

In [20]:
first_day = np.where(data['Order Date'] == unique_dates[0])
first_day_orders = data.iloc[first_day]

pd.set_option('display.max_columns', None)
print(first_day_orders)

print(np.sum(first_day_orders.to_numpy()[:, 2:], axis=0))

    Order Date  Order Id  1. Kacchi Basmati ( P1 )  1. Kacchi Basmoti ( P3 )  \
0   2020-06-04     61456                         2                         0   
1   2020-06-04     61485                         0                         1   
2   2020-06-04     61460                         1                         0   
3   2020-06-04     61461                         1                         0   
4   2020-06-04     61464                         1                         0   
5   2020-06-04     61465                         2                         0   
6   2020-06-04     61471                         1                         0   
7   2020-06-04     61477                         2                         0   
8   2020-06-04     61478                         0                         0   
9   2020-06-04     61473                         6                         0   
10  2020-06-04     61489                         0                         0   
11  2020-06-04     61491                

#### Use Coerrelation to generate Adjacency Matrix of weighted edges depicting the relationship between food items

In [42]:
cleaned_data = data.drop(columns=['Order Id', 'Order Date'])
correlation_matrix = cleaned_data.corr().fillna(0)
correlation_matrix = np.abs(correlation_matrix)

# Calculate the threshold for the lowest 20% values
threshold = np.percentile(correlation_matrix, 20)

# Create a boolean mask for values above or equal to the threshold
mask = correlation_matrix >= threshold

# Keep only the values above or equal to the threshold, set the rest to 0
correlation_matrix_filtered = np.where(mask, correlation_matrix, 0)

print(correlation_matrix_filtered.shape)

# Save the filtered correlation matrix as a CSV file
np.savetxt('../data/processed/corr_matrix_filtered.csv',
           correlation_matrix_filtered, delimiter=",")



# cleaned_data = data.drop(columns=['Order Id', 'Order Date'])

# correlation_matrix = cleaned_data.corr()
# correlation_matrix = correlation_matrix.fillna(0)
# # correlation_matrix.to_csv('../data/processed/corr_matrix_with_index.csv')


# correlation_matrix = np.abs(correlation_matrix)
# print(correlation_matrix.shape)

# # Set lower 


# # save the correlation matrix as a csv file
# np.savetxt('../data/processed/corr_matrix.csv', correlation_matrix, delimiter=",")


# sorted_correlation_matrix = pd.DataFrame()

# # Step 3: Sort columns by maximum absolute correlation values (positive or negative)
# sorted_columns = sorted(correlation_matrix.columns, key=lambda x: correlation_matrix[x].abs().max(), reverse=True)

# # Step 4: Iterate over sorted columns and append to sorted correlation matrix
# for column in sorted_columns:
#     sorted_correlation_matrix[column] = correlation_matrix[column]

# # Step 5: Reindex sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix = sorted_correlation_matrix.reindex(sorted_columns)

# # Step 6: Reindex rows of sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix = sorted_correlation_matrix.reindex(sorted_columns)

# # Step 7: Reindex index and columns of sorted correlation matrix to match the order of sorted columns
# sorted_correlation_matrix.index = sorted_columns
# sorted_correlation_matrix.columns = sorted_columns

# sorted_correlation_matrix.to_csv('../data/processed/sorted_corr_matrix_with_index.csv')

(110, 110)


### Make Adjacency Matrix by actually counting the number of times two food items are ordered together

In [43]:
co_matrix = np.zeros((len(dish_titles), len(dish_titles)))

for _, row in cleaned_data.iterrows():
    # get the indices of the dishes that were ordered
    ordered_dishes = np.where(row.to_numpy() == 1)[0]

    # Update the co-occurrence matrix based on ordered item combinations
    for i in range(len(ordered_dishes)):
        for j in range(i+1, len(ordered_dishes)):
            co_matrix[ordered_dishes[i], ordered_dishes[j]] += 1
            co_matrix[ordered_dishes[j], ordered_dishes[i]] += 1

co_occurrence_df = pd.DataFrame(co_matrix, index=dish_titles, columns=dish_titles)
print(co_occurrence_df.head())
print(co_occurrence_df.shape)

# save the co-occurrence matrix as a csv file
co_occurrence_df.to_csv('../data/processed/co_occurrence_matrix.csv', index=False, header=False)

                          1. Kacchi Basmati ( P1 )  1. Kacchi Basmoti ( P3 )  \
1. Kacchi Basmati ( P1 )                       0.0                     377.0   
1. Kacchi Basmoti ( P3 )                     377.0                       0.0   
1. Kacchi Basmati ( P5 )                     123.0                      70.0   
2. Kacchi Basmati ( P1 )                     243.0                       3.0   
2. Kacchi Basmoti ( P3 )                      22.0                       1.0   

                          1. Kacchi Basmati ( P5 )  2. Kacchi Basmati ( P1 )  \
1. Kacchi Basmati ( P1 )                     123.0                     243.0   
1. Kacchi Basmoti ( P3 )                      70.0                       3.0   
1. Kacchi Basmati ( P5 )                       0.0                       0.0   
2. Kacchi Basmati ( P1 )                       0.0                       0.0   
2. Kacchi Basmoti ( P3 )                       0.0                      15.0   

                          2. Kacchi Ba

In [44]:
zero_co_occurence_matrix = pd.DataFrame(0, index=co_occurrence_df.index, columns=co_occurrence_df.columns)
zero_co_occurence_matrix.to_csv('../data/processed/co_occurrence_matrix.csv', index=False, header=False)
zero_co_occurence_matrix

Unnamed: 0,1. Kacchi Basmati ( P1 ),1. Kacchi Basmoti ( P3 ),1. Kacchi Basmati ( P5 ),2. Kacchi Basmati ( P1 ),2. Kacchi Basmoti ( P3 ),2. Kacchi Basmati ( P5 ),3. Kacchi Basmati ( P1 ),3. Kacchi Basmati ( P3 ),3. Kacchi Basmati ( P5 ),Achari Kabab,Chicken Reshmi Kabab,Chicken Hariali,Chicken Tikka,Mutton Boti Kabab,Chicken Roast,Jali Kabab,Mutton Rezala,Plan Naan,Butter Naan,Garlic Naan,Tilli Naan,Kashmiri Naan,Afgani Naan,Nawabiana Special Naan,Chicken chap,Chicken Taoya Jhal,Chicken Tandoori,Morog polao (Dim),Kacchi set,Polao set,Set 01,Set 02,Set 03,Set 04,Chicken Boti Taoya,Kacchi,Garlic chilli naan,Kacchi special,Plain Polao Set 1,Morog pola (Dim),Chicken onion chaap,Jali kabab,Jorda.1,Chicken Boti Taoya.1,Chicken Taowa Jhal,Chicken achari kabab,Chicken Tandoori.1,Plan naan,Butter naan,Garlic naan,Garlic chilli naan.1,Kashmiri Naan.1,Afghani naan,Nawabiana special naan,Ramadan Platter 01,Grill Platter,Kacchi set 2,Plain Polao Set 2,Nawabiana Kashmiri Naan,Chicken Nawabi Kabab,Chicken Onion Chap,Kasmiri Faluda,Strawberry Milkshake,Vanilla Milkshake,Chocolate Milkshake,Oreo Milkshake,Caramel Cold Coffee,Coconut Milkshake,Nawabiana Special Fruits Salad,4. Kacchi Basmati(P1),4. Kacchi Basmati(P3),Morog pola (Jali),Jhal Misty Voj,Achari Voj,Kashmiri voj,Nawabi Voj,Kacchi (Package),Plain Polao (Package),4. Kacchi Basmati(P5),Kacchi (4pcs Mutton),Jhal Misty Voj.1,Achari Voj.1,Tandoori\n Voj,Nawabi Voj.1,Morog polao (jali),Chicken chap.1,KACCHI SET 3,Tikka Kabab,Morog polao without egg,Iftar package 01,Iftar package 2,Iftar package 03,Iftar package 4,Iftar package 5,Iftar package 6,Malai Lacchi,Blueberry Lacchi,Apple juice,Grape Juice,Malta Juice,Papaya Juice,Pineapple Juice,Raw Mango,Ripe Mango,Olive Juice,Rose Apple Juice,Oreo Cold Coffee,KitKat Cold Coffee,Special Faluda,Fruit salada
1. Kacchi Basmati ( P1 ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1. Kacchi Basmoti ( P3 ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1. Kacchi Basmati ( P5 ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2. Kacchi Basmati ( P1 ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2. Kacchi Basmoti ( P3 ),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Rose Apple Juice,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Oreo Cold Coffee,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
KitKat Cold Coffee,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Special Faluda,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [25]:
order_matrix_df = pd.read_csv('../data/processed/order_matrix_new.csv', header=None)
with open('../data/processed/dates_dict.pkl', "rb") as f:
     dates_dict = pickle.load(f)
date = list(dates_dict.keys())
order_matrix_df.insert(0, 'Date', date)
order_matrix_df.to_csv('../data/processed/order_matrix_with_dates.csv',index=False)