In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [3]:
# Merge datasets to create a unified dataset
customer_transactions = pd.merge(transactions, customers, on="CustomerID")
full_data = pd.merge(customer_transactions, products, on="ProductID")

In [4]:
# Step 1: Feature Engineering
# Total spending by each customer
customer_spending = full_data.groupby("CustomerID")["TotalValue"].sum().rename("TotalSpending")

# Total quantity purchased by each customer
customer_quantity = full_data.groupby("CustomerID")["Quantity"].sum().rename("TotalQuantity")

# Region as a categorical feature
customer_region = customers.set_index("CustomerID")["Region"]

# Combine features into a single DataFrame
customer_features = pd.concat([customer_spending, customer_quantity, pd.get_dummies(customer_region)], axis=1)


In [5]:
# Fill missing values with 0
customer_features.fillna(0, inplace=True)

In [6]:
# Normalize features for similarity calculation
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features)

In [7]:
# Step 2: Compute Similarity Matrix
similarity_matrix = cosine_similarity(normalized_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)


In [8]:
# Step 3: Generate Recommendations for First 20 Customers
recommendations = {}
for customer_id in customers["CustomerID"][:20]:
    # Get similarity scores for the current customer
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Top 3 similar customers
    recommendations[customer_id] = list(zip(similar_customers.index, similar_customers.values))


In [9]:
# Convert recommendations into the required format
lookalike_data = []
for cust_id, lookalikes in recommendations.items():
    for similar_cust_id, score in lookalikes:
        lookalike_data.append({"CustomerID": cust_id, "LookalikeCustomerID": similar_cust_id, "Score": score})

lookalike_df = pd.DataFrame(lookalike_data)


In [10]:
# Save recommendations to CSV
lookalike_df.to_csv("Lookalike.csv", index=False)

In [11]:
# Output top recommendations
print("Lookalike Recommendations for First 20 Customers:")
print(lookalike_df.head(10))

Lookalike Recommendations for First 20 Customers:
  CustomerID LookalikeCustomerID     Score
0      C0001               C0107  0.996781
1      C0001               C0137  0.996133
2      C0001               C0184  0.996082
3      C0002               C0088  0.998174
4      C0002               C0142  0.994310
5      C0002               C0159  0.989536
6      C0003               C0147  0.997951
7      C0003               C0190  0.997257
8      C0003               C0174  0.983825
9      C0004               C0113  0.994517
