In [2]:
# Importing necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [3]:
# Loading datasets
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

In [4]:
# Merge datasets
merged_df = pd.merge(transactions_df, customers_df, on="CustomerID", how="left")
merged_df = pd.merge(merged_df, products_df, on="ProductID", how="left")

In [5]:
# Step 1: Create customer profiles
customer_profiles = (
    merged_df.groupby("CustomerID")
    .agg({
        "Quantity": "sum",     # Total quantity purchased
        "Price_x": "mean",       # Average price of products purchased
        "TotalValue": "sum",   # Total spending
    })
    .reset_index()
)

In [8]:
# Step 2: Normalize the numerical features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_profiles[["Quantity", "Price_x", "TotalValue"]])

In [9]:
# Step 3: Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(scaled_features)

In [10]:
# Step 4: Get the top 3 similar customers for the first 20 customers
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles["CustomerID"], columns=customer_profiles["CustomerID"])

In [11]:
# Extract top 3 lookalikes for the first 20 customers
lookalike_data = {}
for customer_id in customer_profiles["CustomerID"].iloc[:20]:
    # Get similarity scores for all other customers
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]  # Exclude self
    lookalike_data[customer_id] = list(zip(similar_customers.index, similar_customers.values))

In [12]:
# Step 5: Save results as Lookalike.csv
lookalike_df = pd.DataFrame(
    [(cust_id, lookalikes) for cust_id, lookalikes in lookalike_data.items()],
    columns=["CustomerID", "Lookalikes"]
)
lookalike_df.to_csv("Lookalike.csv", index=False)

# Display output
print(lookalike_df)

   CustomerID                                         Lookalikes
0       C0001  [(C0139, 0.9999868808961591), (C0193, 0.999809...
1       C0002  [(C0029, 0.999916648063628), (C0019, 0.9995096...
2       C0003  [(C0124, 0.9987683656419095), (C0038, 0.998712...
3       C0004  [(C0017, 0.9998858943700686), (C0175, 0.999836...
4       C0005  [(C0063, 0.9999854098992549), (C0007, 0.999790...
5       C0006  [(C0023, 0.9999294373393113), (C0154, 0.999898...
6       C0007  [(C0146, 0.9998578025228806), (C0005, 0.999790...
7       C0008  [(C0084, 0.9995734861924988), (C0039, 0.999126...
8       C0009  [(C0020, 0.9999934031283176), (C0080, 0.999900...
9       C0010  [(C0121, 0.9989344823273082), (C0034, 0.995901...
10      C0011  [(C0001, 0.9997949778407051), (C0139, 0.999781...
11      C0012  [(C0093, 0.999985682642632), (C0018, 0.9999011...
12      C0013  [(C0059, 0.9997341346587364), (C0021, 0.999696...
13      C0014  [(C0128, 0.9995810323471414), (C0009, 0.998740...
14      C0015  [(C0009, 0