In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
# For loading the data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# For merging the datasets
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


In [4]:
# For aggregating the features for each customer
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',   # Total spending
    'Quantity': 'sum',    # Total quantity being purchased
    'ProductID': 'nunique' # Number of unique products being purchased
}).reset_index()

customer_features.rename(columns={'ProductID': 'UniqueProducts'}, inplace=True)
print(customer_features.head())


  CustomerID  TotalValue  Quantity  UniqueProducts
0      C0001     3354.52        12               5
1      C0002     1862.74        10               4
2      C0003     2725.38        14               4
3      C0004     5354.88        23               8
4      C0005     2034.24         7               3


In [5]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalValue', 'Quantity', 'UniqueProducts']])


In [6]:
# For finding the similarities

similarity_matrix = cosine_similarity(scaled_features)


In [7]:
#Let's find the Top 3 Lookalikes

def get_top_n_lookalikes(similarity_matrix, customer_ids, n=3):
    recommendations = {}
    for i, customer_id in enumerate(customer_ids):
        scores = list(enumerate(similarity_matrix[i]))
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        top_customers = [{"CustomerID": customer_ids[j], "Score": round(score, 4)} for j, score in scores[1:n+1]]
        recommendations[customer_id] = top_customers
    return recommendations


In [9]:
print("Similarity Matrix Shape:", similarity_matrix.shape)
print("Customer IDs Length:", len(customer_ids))


Similarity Matrix Shape: (199, 199)
Customer IDs Length: 199


In [10]:
customer_ids = customer_features['CustomerID'][:similarity_matrix.shape[0]].tolist()


In [13]:
lookalike_recommendations = get_top_n_lookalikes(similarity_matrix, customer_ids[:20])


In [14]:
customer_ids = customer_features['CustomerID'].tolist()
lookalike_recommendations = get_top_n_lookalikes(similarity_matrix, customer_ids[:20])

# Convert to DataFrame and save as CSV
import json
lookalike_df = pd.DataFrame([
    {'CustomerID': k, 'Lookalikes': json.dumps(v)} for k, v in lookalike_recommendations.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)
