In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

Lookalike recommendations saved to Lookalike.csv


In [None]:
customers = pd.read_csv("customers.csv")
products = pd.read_csv("products.csv")
transactions = pd.read_csv("transactions.csv")

In [None]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [None]:
transaction_features = transactions.groupby('CustomerID').agg({
 'TotalValue': ['sum', 'mean', 'count'],
 'Quantity': ['sum', 'mean']
}).reset_index()
transaction_features.columns = ['CustomerID', 'TotalValue_sum', 'TotalValue_mean', 'Transaction_count', 'Quantity_sum', 'Quantity_mean']


customer_data = pd.merge(customers, transaction_features, on='CustomerID', how='left')


customer_data.fillna(0, inplace=True)


customer_data = pd.get_dummies(customer_data, columns=['Region'])


scaler = StandardScaler()
numerical_features = ['TotalValue_sum', 'TotalValue_mean', 'Transaction_count', 'Quantity_sum', 'Quantity_mean']
customer_data[numerical_features] = scaler.fit_transform(customer_data[numerical_features])


similarity_matrix = cosine_similarity(customer_data.drop(columns=['CustomerID', 'CustomerName', 'SignupDate']))


lookalike_results = {}
for i in range(20):
 customer_id = customer_data.iloc[i]['CustomerID']
 similarities = list(enumerate(similarity_matrix[i]))
 
 similarities = [(idx, score) for idx, score in similarities if idx != i]

 similarities.sort(key=lambda x: x[1], reverse=True)

 top_3 = similarities[:3]
 lookalike_results[customer_id] = [(customer_data.iloc[idx]['CustomerID'], score) for idx, score in top_3]

# Save results to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index')
lookalike_df.to_csv("Chirag_Jain_Lookalike.csv", header=False)

print("Lookalike recommendations saved")