In [1]:
import pandas as pd

In [3]:
from sklearn.preprocessing import StandardScaler

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

In [18]:
import matplotlib.pyplot as plt

In [19]:
# Load data

In [20]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [21]:
# Merge transactions with product info
transactions = transactions.merge(products, on="ProductID")

In [22]:
# Feature Engineering for Customers

In [23]:
customer_features = customers.merge(
    transactions.groupby("CustomerID").agg(
        total_spent=("TotalValue", "sum"),
        avg_transaction=("TotalValue", "mean"),
        favorite_category=("Category", lambda x: x.mode()[0])
    ), on="CustomerID"
)


In [24]:
# Encode categorical features

In [25]:
features = pd.get_dummies(customer_features[["Region", "total_spent", "avg_transaction", "favorite_category"]])

In [26]:
# Normalize

In [27]:
scaler = StandardScaler()

In [28]:
scaled_features = scaler.fit_transform(features)

In [29]:
# Compute similarity

In [30]:
similarity_matrix = cosine_similarity(scaled_features)

In [31]:
# Generat recommendations for first 20 customers

In [32]:
lookalike_map = {}

In [33]:
target_customers = customer_features["CustomerID"].iloc[:20].tolist()

In [35]:
for cust_id in target_customers:
    idx = customer_features[customer_features["CustomerID"] == cust_id].index[0]
    scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]  # Exclude self
    lookalike_map[cust_id] = [
        (customer_features.iloc[i]["CustomerID"], round(score, 2)) 
        for i, score in sorted_scores
    ]

In [36]:
# Save to CSV in file