In [1]:
import pandas as pd
import numpy as np
import csv
from sklearn.metrics.pairwise import cosine_similarity

transactions = pd.read_csv('transactions.csv')  
products = pd.read_csv('products.csv')          
customers = pd.read_csv('customers.csv')

merged_data = transactions.merge(products, on='ProductID').merge(customers, on='CustomerID')
pivot_table = merged_data.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)
pivot_table = pivot_table.apply(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() != x.min() else x, axis=1)
similarity_matrix = cosine_similarity(pivot_table)

def get_top_3_lookalikes(customer_id, similarity_matrix, customer_ids):
    idx = customer_ids.index(customer_id)
    similar_customers = np.argsort(similarity_matrix[idx])[::-1]
    top_customers = similar_customers[1:4]
    return [(customer_ids[i], round(similarity_matrix[idx, i], 4)) for i in top_customers]

customer_ids = pivot_table.index.tolist()
lookalikes = {
    cust: get_top_3_lookalikes(cust, similarity_matrix, customer_ids) for cust in customer_ids[:20]
}

file_name = "Lookalike_Customers.csv"
with open(file_name, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'LookalikeCustomers'])
    for key, value in lookalikes.items():
        lookalikes_str = "; ".join([f"{v[0]} (similarity: {v[1]})" for v in value])
        writer.writerow([key, lookalikes_str])

print(f"Lookalike results saved to {file_name}")

Lookalike results saved to Lookalike_Customers.csv
