In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [3]:
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

In [5]:
data = transactions.merge(products, on='ProductID').merge(customers, on='CustomerID')


In [7]:
customer_features = data.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),
    NumTransactions=('TransactionID', 'count'),
    AvgTransactionValue=('TotalValue', 'mean')
).reset_index()

In [9]:
customer_data = customers.merge(customer_features, on='CustomerID', how='left')


In [11]:
customer_data.fillna({'TotalSpend': 0, 'NumTransactions': 0, 'AvgTransactionValue': 0}, inplace=True)


In [13]:
scaler = MinMaxScaler()
numeric_features = ['TotalSpend', 'NumTransactions', 'AvgTransactionValue']
customer_data[numeric_features] = scaler.fit_transform(customer_data[numeric_features])


In [15]:
similarity_matrix = cosine_similarity(customer_data[numeric_features])


In [17]:
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data['CustomerID'], columns=customer_data['CustomerID'])


In [19]:
lookalike_results = {}
for customer_id in customer_data['CustomerID'][:20]:
    # Get similarity scores for the customer
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Exclude self
    lookalike_results[customer_id] = list(similar_customers.items())

In [21]:
lookalike_output = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': similar}
    for cust_id, similar in lookalike_results.items()
])
lookalike_output.to_csv("Lookalike.csv", index=False)

In [23]:
print("Lookalike model recommendations saved to 'Lookalike.csv'.")

Lookalike model recommendations saved to 'Lookalike.csv'.
