Task 2: Lookalike Model

1. Importing Required Libraries

In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler


2. Loading Datasets

In [8]:
# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


3. Merging the Datasets

In [9]:
# Merge datasets
merged_data = transactions.merge(products, on="ProductID").merge(customers, on="CustomerID")


4. Feature Engineering

In [None]:
# Feature engineering
customer_features = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",                # Total spend
    "Quantity": "sum",                  # Total quantity purchased
    "Price_x": "mean",                  # Average product price
    "TransactionDate": "count"          # Transaction count
}).rename(columns={
    "TotalValue": "Total_Spend",
    "Quantity": "Total_Quantity",
    "Price_x": "Avg_Product_Price",
    "TransactionDate": "Transaction_Count"
})


5. Adding Customer Demographics

In [10]:
# Add customer demographics
customer_features = customer_features.merge(customers[["CustomerID", "Region", "SignupDate"]], on="CustomerID")


6. Converting Signup Date to Numerical (Days Since Signup)



In [11]:
# Convert signup date to numerical (days since signup)
customer_features["SignupDate"] = pd.to_datetime(customer_features["SignupDate"])
customer_features["Days_Since_Signup"] = (pd.Timestamp.now() - customer_features["SignupDate"]).dt.days
customer_features.drop(columns=["SignupDate"], inplace=True)


7. One-Hot Encoding Categorical Data

In [12]:
# One-hot encode categorical data (e.g., Region)
customer_features = pd.get_dummies(customer_features, columns=["Region"])


8. Normalizing Numerical Features

In [13]:
# Normalize numerical features
scaler = MinMaxScaler()
numerical_cols = ["Total_Spend", "Total_Quantity", "Avg_Product_Price", "Transaction_Count", "Days_Since_Signup"]
customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])


9. Computing Similarity Matrix

In [14]:
# Compute similarity matrix
customer_ids = customer_features["CustomerID"]
feature_matrix = customer_features.drop(columns=["CustomerID"])
similarity_matrix = cosine_similarity(feature_matrix)


10. Generating Lookalike Recommendations

In [15]:
# Generate lookalike recommendations for first 20 customers
lookalikes = {}
for idx, customer_id in enumerate(customer_ids[:20]):  # First 20 customers only
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)  # Sort by similarity
    top_lookalikes = [(customer_ids[i], round(score, 3)) for i, score in similarity_scores if customer_ids[i] != customer_id][:3]
    lookalikes[customer_id] = top_lookalikes


11. Saving Lookalike Recommendations to CSV

In [16]:
# Create Lookalike.csv
lookalike_df = pd.DataFrame({
    "CustomerID": lookalikes.keys(),
    "Lookalike_Customers": [str(val) for val in lookalikes.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'")


Lookalike recommendations saved to 'Lookalike.csv'
