# Task 2: Lookalike Model

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv("Customers.csv")
products =  pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [2]:
merged_data = transactions.merge(customers, on="CustomerID", how="inner").merge(products, on="ProductID", how="inner")

### Aggregating transaction history for each customer

In [3]:
customer_data = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: x.mode()[0],  # Most common category purchased
    'Region': 'first'  # Region of the customer
}).reset_index()


In [4]:
customer_data = pd.get_dummies(customer_data, columns=['Category', 'Region'], drop_first=True)

### Standardize the data

In [5]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_data.iloc[:, 1:])

### Calculating cosine similarity

In [6]:
similarity_matrix = cosine_similarity(scaled_data)

### Find top 3 similar customers for each customer

In [7]:
lookalike_results = {}
for idx, customer_id in enumerate(customer_data['CustomerID']):
    # Get similarity scores for the current customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    # Sort by similarity score in descending order (excluding self)
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = similarity_scores[1:4]  # Top 3 similar customers
    lookalike_results[customer_id] = [(customer_data['CustomerID'][i], score) for i, score in top_3]

In [8]:
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "Top3Lookalikes": [
        [{"CustomerID": x[0], "Score": round(x[1], 2)} for x in lookalike_results[customer_id]]
        for customer_id in lookalike_results.keys()
    ]
})

In [10]:
lookalike_csv = "Boopalamani_J_Lookalike.csv"
lookalike_df.to_csv(lookalike_csv, index=False)
print(f"Lookalike results saved to {lookalike_csv}")

Lookalike results saved to Boopalamani_J_Lookalike.csv
