In [2]:
import numpy as np
import pandas as pd

In [4]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [6]:
transactions_customers = pd.merge(transactions, customers, on='CustomerID', how='left')
data = pd.merge(transactions_customers, products, on='ProductID', how='left')
data.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,CustomerName,Region,SignupDate,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15,ComfortLiving Bluetooth Speaker,Electronics,300.68


In [8]:
customer_profiles = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'Quantity': 'sum',    # Total quantity purchased
    'Category': lambda x: x.mode()[0] if not x.mode().empty else None,  # Most purchased category
    'Region': 'first',    # Region of the customer
}).reset_index()

In [10]:
customer_profiles = pd.get_dummies(customer_profiles, columns=['Region', 'Category'], drop_first=True)

In [12]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [14]:
scaler = MinMaxScaler()
customer_profiles[['TotalValue', 'Quantity']] = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])
customer_ids = customer_profiles['CustomerID']
features = customer_profiles.drop(columns=['CustomerID']).values
similarity_matrix = cosine_similarity(features)
lookalike_map = {}

In [18]:
for i, customer_id in enumerate(customer_ids[:20]):
    similarity_scores = similarity_matrix[i]
    similarity_df = pd.DataFrame({
        'CustomerID': customer_ids,
        'SimilarityScore': similarity_scores
    })
    similarity_df = similarity_df[similarity_df['CustomerID'] != customer_id]
    top_3 = similarity_df.nlargest(3, 'SimilarityScore')
    lookalike_map[customer_id] = list(zip(top_3['CustomerID'], top_3['SimilarityScore']))

lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'Lookalikes': [str(lookalike_map[cust]) for cust in lookalike_map]
})
lookalike_df.to_csv('Lookalike.csv', index=False)

print("\nLookalike recommendations for the first 20 customers have been saved as 'Lookalike.csv'.")


Lookalike recommendations for the first 20 customers have been saved as 'Lookalike.csv'.
