In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [3]:
# Merge datasets for comprehensive analysis
data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

In [4]:
# Feature Engineering
# Aggregate customer-level data
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'Region': lambda x: x.mode()[0]  # Most frequent region
}).reset_index()

In [5]:
# One-hot encode categorical features (Region)
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

In [6]:
# Normalize numeric features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

In [7]:
# Calculate similarity using cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalike recommendations for the first 20 customers
customer_ids = customer_features['CustomerID']
lookalike_results = {}
for idx, customer in enumerate(customer_ids[:20]):
    # Get similarity scores for the current customer
    scores = list(enumerate(similarity_matrix[idx]))
    # Sort scores by similarity, excluding the customer itself
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]
    # Map customer ID to the top 3 similar customers
    lookalike_results[customer] = [(customer_ids[i], score) for i, score in sorted_scores]

In [8]:
# Save results to a CSV file
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.index.name = 'CustomerID'
lookalike_df.to_csv('bhavya_shree_Lookalike.csv')

print("Lookalike recommendations saved to 'bhavya_shree_Lookalike.csv'.")

Lookalike recommendations saved to 'bhavya_shree_Lookalike.csv'.


In [9]:
looklike=pd.read_csv("bhavya_shree_Lookalike.csv")
print(looklike)

   CustomerID                     Lookalike1                     Lookalike2  \
0       C0001  ('C0137', 0.9999760744554356)  ('C0152', 0.9999512581210065)   
1       C0002  ('C0142', 0.9923237804484972)  ('C0177', 0.9744812852490708)   
2       C0003  ('C0133', 0.9989191532022407)  ('C0052', 0.9977152611110504)   
3       C0004  ('C0113', 0.9932742267970033)  ('C0102', 0.9893132131115234)   
4       C0005  ('C0159', 0.9999338335836799)  ('C0186', 0.9969706156703498)   
5       C0006  ('C0158', 0.9885088252201482)   ('C0168', 0.975341785210415)   
6       C0007  ('C0159', 0.9861479609355337)  ('C0005', 0.9841746631569637)   
7       C0008  ('C0109', 0.9816332981630587)  ('C0139', 0.9646953234916094)   
8       C0009  ('C0062', 0.9895764049212195)  ('C0198', 0.9876658949337963)   
9       C0010  ('C0199', 0.9981856783018741)  ('C0121', 0.9883480514072666)   
10      C0011  ('C0107', 0.9994455160430945)  ('C0048', 0.9992878943750295)   
11      C0012   ('C0155', 0.999473779145566)  ('C010