 Import necessary **libraries**



In [33]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

load the **datasets**

In [34]:

customers = pd.read_csv("/content/drive/MyDrive/Zeotap/Customers.csv")
products = pd.read_csv("/content/drive/MyDrive/Zeotap/Products.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Zeotap/Transactions.csv")

Merge **Dataset**

In [35]:
transactions = transactions.merge(customers, on='CustomerID', how='left')
transactions = transactions.merge(products, on='ProductID', how='left')

In [36]:
transactions.rename(columns={
    'Price_x': 'TransactionPrice',  # Price from Transactions.csv
    'Price_y': 'ProductBasePrice'   # Price from Products.csv
}, inplace=True)

Feature engineering for **Customers**

In [37]:
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending by customer
    'Quantity': 'sum',    # Total quantity purchased
    'TransactionPrice': 'mean',  # Average transaction price
    'Category': lambda x: x.mode()[0] if len(x.mode()) > 0 else 'Unknown'  # Most purchased category
}).reset_index()

Encode categorical **features**

In [38]:
customer_features = pd.get_dummies(customer_features, columns=['Category'], drop_first=True)

Standardize numerical features

In [39]:
scaler = StandardScaler()
numerical_columns = ['TotalValue', 'Quantity', 'TransactionPrice']
customer_features[numerical_columns] = scaler.fit_transform(customer_features[numerical_columns])

Compute similarity matrix

In [40]:
customer_ids = customer_features['CustomerID']
customer_features_matrix = customer_features.drop('CustomerID', axis=1)
similarity_matrix = cosine_similarity(customer_features_matrix)


Generate lookalike recommendations (TOP 3)

In [41]:
lookalike_map = {}
for i, customer_id in enumerate(customer_ids):

    similarity_scores = list(enumerate(similarity_matrix[i]))

    similarity_scores = sorted([(customer_ids[j], score) for j, score in similarity_scores if j != i],
                                key=lambda x: x[1], reverse=True)

    top_3_lookalikes = similarity_scores[:3]
    lookalike_map[customer_id] = top_3_lookalikes

Filter results for the first 20 customers

In [42]:
filtered_lookalike_map = {cust_id: lookalike_map[cust_id] for cust_id in customer_ids if cust_id in [f'C{str(i).zfill(4)}' for i in range(1, 21)]}

lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': str(lookalike_map)}
    for cust_id, lookalike_map in filtered_lookalike_map.items()
])
lookalike_df.to_csv("Lookalike.csv", index=False)