In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

customer_data = customers_df.merge(transactions_df, on='CustomerID', how='left')

customer_data = customer_data.groupby('CustomerID').agg({
    'Region': 'first',
    'SignupDate': 'first',
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()

customer_data['NumProductsPurchased'] = transactions_df.groupby('CustomerID')['ProductID'].nunique().reset_index(drop=True)

customer_features = customer_data[['Quantity', 'TotalValue', 'NumProductsPurchased']].astype(float)

customer_features.fillna(0, inplace=True)  # Replace NaNs with 0

scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features)

similarity_matrix = cosine_similarity(customer_features_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data['CustomerID'], columns=customer_data['CustomerID'])

def get_top_3_lookalikes(customer_id, similarity_df):
    top_customers = similarity_df.loc[customer_id].nlargest(4)[1:]  # Exclude itself
    return top_customers

lookalike_recommendations = {}

for customer_id in customer_data['CustomerID'].head(20):
    top_3 = get_top_3_lookalikes(customer_id, similarity_df)

    top_3_list = list(zip(top_3.index, top_3.values))
    while len(top_3_list) < 3:
        top_3_list.append(("", 0))

    flattened_top_3 = [item for sublist in top_3_list for item in sublist]
    lookalike_recommendations[customer_id] = flattened_top_3

lookalike_df = pd.DataFrame.from_dict(lookalike_recommendations, orient='index')

lookalike_df.columns = sum([[f'Lookalike_{i+1}', f'Score_{i+1}'] for i in range(lookalike_df.shape[1] // 2)], [])

lookalike_df.to_csv('Lookalike.csv')

print(lookalike_df.head())


      Lookalike_1   Score_1 Lookalike_2   Score_2 Lookalike_3   Score_3
C0001       C0137  0.950558       C0164  0.944220       C0069  0.924948
C0002       C0029  0.999760       C0031  0.999000       C0094  0.993627
C0003       C0176  0.883064       C0027  0.857059       C0010  0.821926
C0004       C0075  0.997674       C0175  0.994176       C0113  0.991124
C0005       C0058  0.999794       C0123  0.999700       C0015  0.999682
