In [21]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pickle

try:
    df = pd.read_csv('cleaned_kfc_data.csv')
    print("Data loaded.")
except FileNotFoundError:
    print("Error: 'cleaned_kfc_data.csv' not found.")


Data loaded.


In [22]:
if 'df' in locals():
    melted_df = df.melt(id_vars=['OrderID'], value_vars=['Primary_Item', 'Side_Item'], value_name='Item').dropna()
    melted_df['Quantity'] = 1
    order_item_matrix = melted_df.pivot_table(index='OrderID', columns='Item', values='Quantity', fill_value=0)
    print("Order-Item Matrix Shape:", order_item_matrix.shape)
    display(order_item_matrix.head())


Order-Item Matrix Shape: (500, 10)


Item,Biscuits,Coleslaw,Corn On The Cob,Fries,Hot Wings,Mashed Potatoes,Original Recipe Chicken,Popcorn Chicken,Twister Wrap,Zinger Burger
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [23]:
if 'order_item_matrix' in locals():
    item_item_matrix = order_item_matrix.T
    cosine_sim_matrix = cosine_similarity(item_item_matrix)
    cosine_sim_df = pd.DataFrame(cosine_sim_matrix, index=item_item_matrix.index, columns=item_item_matrix.index)
    print("\nItem-Item Similarity Matrix (Top 5x5):")
    display(cosine_sim_df.head())



Item-Item Similarity Matrix (Top 5x5):


Item,Biscuits,Coleslaw,Corn On The Cob,Fries,Hot Wings,Mashed Potatoes,Original Recipe Chicken,Popcorn Chicken,Twister Wrap,Zinger Burger
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Biscuits,1.0,0.0,0.0,0.0,0.075378,0.0,0.204407,0.07986,0.187083,0.280174
Coleslaw,0.0,1.0,0.0,0.0,0.10716,0.0,0.127453,0.113531,0.110818,0.251184
Corn On The Cob,0.0,0.0,1.0,0.0,0.121854,0.0,0.161494,0.147542,0.123443,0.209849
Fries,0.0,0.0,0.0,1.0,0.144749,0.0,0.225374,0.125473,0.268277,0.251166
Hot Wings,0.075378,0.10716,0.121854,0.144749,1.0,0.249149,0.0,0.0,0.0,0.0


In [24]:
if 'cosine_sim_df' in locals():
    with open('similarity_matrix.pkl', 'wb') as f:
        pickle.dump(cosine_sim_df, f)
    print("Similarity matrix saved as 'similarity_matrix.pkl'.")


Similarity matrix saved as 'similarity_matrix.pkl'.
