In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [4]:
merged_data = transactions.merge(customers, on='CustomerID', how='left').merge(products, on='ProductID', how='left')

In [5]:
customer_features = merged_data.groupby('CustomerID').agg(
    total_spending=('TotalValue', 'sum'),
    avg_spending=('TotalValue', 'mean'),
    total_quantity=('Quantity', 'sum'),
    num_transactions=('TransactionID', 'nunique'),
    unique_products=('ProductID', 'nunique'),
    most_purchased_category=('Category', lambda x: x.mode()[0] if len(x) > 0 else None)
).reset_index()

In [6]:
customer_features = customer_features.merge(customers[['CustomerID', 'Region']], on='CustomerID', how='left')
customer_features = pd.get_dummies(customer_features, columns=['Region', 'most_purchased_category'], drop_first=True)

In [7]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

In [8]:
similarity_matrix = cosine_similarity(scaled_features)

In [9]:
customer_ids = customer_features['CustomerID']
lookalike_map = {}

In [10]:
for idx, cust_id in enumerate(customer_ids[:20]):
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_ids[i], score) for i, score in similarity_scores[1:4]]
    lookalike_map[cust_id] = top_3

In [11]:
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalikes}
    for cust_id, lookalikes in lookalike_map.items()
])

In [12]:
lookalike_df['Lookalikes'] = lookalike_df['Lookalikes'].apply(lambda x: str(x))
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model completed. Output saved as Lookalike.csv.")

Lookalike model completed. Output saved as Lookalike.csv.
