In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load data
df = pd.read_excel("superstore.xlsx", engine="openpyxl")

# Subset relevant columns
df = df[['Customer Name', 'Sub-Category', 'Sales']]
df.head()

Unnamed: 0,Customer Name,Sub-Category,Sales
0,Claire Gute,Bookcases,261.96
1,Claire Gute,Chairs,731.94
2,Darrin Van Huff,Labels,14.62
3,Sean O'Donnell,Tables,957.5775
4,Sean O'Donnell,Storage,22.368


In [3]:
# Pivot: rows = customers, columns = sub-categories, values = sales
pivot_df = df.pivot_table(index='Customer Name', columns='Sub-Category', values='Sales', aggfunc='sum', fill_value=0)

# Transpose: now rows = products, columns = customers
product_matrix = pivot_df.T

# Compute cosine similarity between products
similarity_matrix = pd.DataFrame(cosine_similarity(product_matrix), 
                                 index=product_matrix.index,
                                 columns=product_matrix.index)
print(similarity_matrix)

Sub-Category  Accessories  Appliances       Art   Binders  Bookcases  \
Sub-Category                                                           
Accessories      1.000000    0.228389  0.179739  0.111672   0.174517   
Appliances       0.228389    1.000000  0.228334  0.073717   0.122490   
Art              0.179739    0.228334  1.000000  0.123349   0.141045   
Binders          0.111672    0.073717  0.123349  1.000000   0.076669   
Bookcases        0.174517    0.122490  0.141045  0.076669   1.000000   
Chairs           0.273552    0.234733  0.261970  0.172705   0.161995   
Copiers          0.048682    0.036205  0.127391  0.045974   0.034897   
Envelopes        0.145892    0.114481  0.180546  0.091373   0.111434   
Fasteners        0.162176    0.132210  0.178701  0.155897   0.127480   
Furnishings      0.282980    0.222811  0.221294  0.225207   0.179493   
Labels           0.114982    0.082054  0.124892  0.057671   0.123884   
Machines         0.075226    0.080685  0.090520  0.055726   0.05

In [6]:
def recommend_products(selected_product, top_n=3):
    if selected_product not in similarity_matrix.columns:
        return f"Product '{selected_product}' not found in dataset."
    
    similar_items = similarity_matrix[selected_product].sort_values(ascending=False)
    recommendations = similar_items.iloc[1:top_n+1].index.tolist()
    return recommendations

# Example usage
if __name__ == "__main__":
    item = "Phones"  # Replace with any Sub-Category from your dataset
    print(f"Recommendations for '{item}': {recommend_products(item)}")

Recommendations for 'Phones': ['Paper', 'Chairs', 'Furnishings']
