In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price_x': 'mean',
    'Price_y': 'mean',
    'ProductID': 'nunique',
    'Region': 'first',

}).reset_index()

customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features.drop('CustomerID', axis=1))

similarity_matrix = cosine_similarity(normalized_features)

lookalike_map = {}
customer_ids = customer_features['CustomerID'].tolist()

for i, customer_id in enumerate(customer_ids[:20]):
    scores = list(enumerate(similarity_matrix[i]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_ids[j], round(score, 2)) for j, score in sorted_scores[1:4]]
    lookalike_map[customer_id] = top_3

lookalike_df = pd.DataFrame([
    {'CustomerID': key, 'Lookalikes': str(value)} for key, value in lookalike_map.items()
])

lookalike_df.to_csv('Lookalike.csv', index=False)