## Recommendation System item-based, Collaborative, cosine similarity, Return 5 similar products

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity # For computing similarity between items


In [9]:
# 1. Load and prepare data
df = pd.read_csv("/content/drive/MyDrive/Files/online_retail.csv", encoding="ISO-8859-1")

df = df.dropna(subset=['CustomerID']) # Remove rows where CustomerID is missing (needed for building user-item matrix)
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')] # Remove canceled transactions (InvoiceNo starting with 'C')
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)] # Keep only positive quantity and price entries
df['TotalPrice'] = df['Quantity'] * df['UnitPrice'] # Calculate total price for each product purchase

In [15]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,TotalPrice
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2022-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2022-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2022-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2022-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2022-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34


# Prepare Purchase Matrix

In [19]:
# Create a pivot table: rows = customers, columns = products
user_item_matrix = df.pivot_table(index='CustomerID', columns='StockCode', values='Quantity', aggfunc='sum')
user_item_matrix.fillna(0, inplace=True) # Replace missing values (no purchase) with 0

In [20]:
# Compute item-to-item cosine similarity
# Compute cosine similarity between items (transpose to get items as rows)
item_similarity = cosine_similarity(user_item_matrix.T)

# # Convert the similarity matrix to a DataFrame for easy access
item_similarity_df = pd.DataFrame(item_similarity,
                                   index=user_item_matrix.columns,
                                   columns=user_item_matrix.columns)

# Recommendation Function

In [22]:
# Get mapping of product codes to names
product_map = df.drop_duplicates(subset=['StockCode'])[['StockCode', 'Description']].dropna()
stock_desc_dict = pd.Series(product_map.Description.values, index=product_map.StockCode).to_dict()

# Create a reverse dictionary to map product names to stock codes
desc_stock_dict = {v: k for k, v in stock_desc_dict.items()}

In [23]:
# Recommendation function
def recommend_products_by_name(product_name, top_n=5): 
    if product_name not in desc_stock_dict:             # Check if product name exists in dataset
        return "❌ Product name not found!"

    stock_code = desc_stock_dict[product_name]          # Get stock code from product name

    if stock_code not in item_similarity_df.columns:    # Check if stock code has similarity scores
        return "❌ Product code not found in similarity matrix!"

     # Get top N most similar items (excluding the item itself which has highest similarity = 1)
    similar_items = item_similarity_df[stock_code].sort_values(ascending=False)[1:top_n+1]

    results = []
    for code in similar_items.index:    # For each similar item
        name = stock_desc_dict.get(code, f"StockCode {code}")   # Get product name from stock code
        score = similar_items[code]                             # Get similarity score
        results.append((name, score))                            # Add to result list

    return results                      # Return list of top similar products with scores


In [26]:
# Pick a sample product name from dataset
sample_product_name = "CREAM CUPID HEARTS COAT HANGER"

recommendations = recommend_products_by_name(sample_product_name)

print(f"\n💡 Recommendations for: {sample_product_name}")
for i, (name, score) in enumerate(recommendations, 1):
    print(f"{i}. {name} (Similarity Score: {score:.2f})")



💡 Recommendations for: CREAM CUPID HEARTS COAT HANGER
1. FOLKART ZINC HEART CHRISTMAS DEC (Similarity Score: 0.89)
2. FOLDING BUTTERFLY MIRROR RED   (Similarity Score: 0.88)
3. FOLDING BUTTERFLY MIRROR HOT PINK  (Similarity Score: 0.88)
4. 3 HOOK HANGER MAGIC GARDEN (Similarity Score: 0.88)
5. BLUE BUNNY EASTER EGG BASKET (Similarity Score: 0.88)
