<a href="https://colab.research.google.com/github/YG2312/eCommerce-EDA-Lookalike-Clustering/blob/main/Yash_Gupta_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [6]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets for model
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [7]:
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price_x': 'mean'
}).reset_index()

In [8]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

In [9]:
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

In [10]:
lookalike_dict = {}
for customer in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
    lookalike_dict[customer] = [(sim_cust, round(score, 4)) for sim_cust, score in similar_customers.items()]

In [11]:
lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index', columns=['Lookalike_1', 'Lookalike_2', 'Lookalike_3'])
lookalike_df.reset_index(inplace=True)
lookalike_df.rename(columns={'index': 'CustomerID'}, inplace=True)
lookalike_df.to_csv('Lookalike.csv', index=False)