In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
customers = pd.read_csv("E:\Ecommercedata\Customers.csv")
products = pd.read_csv("E:\Ecommercedata\Products.csv")
transactions = pd.read_csv("E:\Ecommercedata\Transactions.csv")


print("Customers Head:\n", customers.head())
print("Products Head:\n", products.head())
print("Transactions Head:\n", transactions.head())

# Check for missing values

print("Missing Values in Customers:\n", customers.isnull().sum())
print("Missing Values in Products:\n", products.isnull().sum())
print("Missing Values in Transactions:\n", transactions.isnull().sum())


# Merge datasets for unified analysis

merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


Customers Head:
   CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
Products Head:
   ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
Transactions Head:
   TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127

In [4]:
# 'Price' exists in the merged_data

if 'Price' not in merged_data.columns:
    merged_data['Price'] = merged_data['TotalValue'] / merged_data['Quantity']

# Feature Engineering for Lookalike Model

customer_features = merged_data.groupby("CustomerID").agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'Price': 'mean',
    'Category': lambda x: x.mode()[0]
}).rename(columns={
    'TotalValue': 'TotalSpend',
    'TransactionID': 'TransactionCount',
    'Price': 'AvgPrice',
    'Category': 'FavoriteCategory'
})


# One-hot encode FavoriteCategory

customer_features = pd.get_dummies(customer_features, columns=['FavoriteCategory'])


from sklearn.preprocessing import MinMaxScaler

# Scale data for similarity calculation

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features)


# Calculate similarity

similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)


# Generate Lookalike Recommendations

lookalikes = {}
for customer_id in customers['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalikes[customer_id] = list(zip(similar_customers.index, similar_customers.values))


# Save Lookalike Recommendations

lookalike_data = []
for customer, similar_list in lookalikes.items():
    for similar_customer, score in similar_list:
        lookalike_data.append([customer, similar_customer, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'SimilarCustomerID', 'Score'])
lookalike_df.to_csv("Lookalike.csv", index=False)


print("Lookalike Recommendations saved to Lookalike.csv")

# Load Lookalike.csv and display the first few rows
lookalike_results = pd.read_csv("Lookalike.csv")
print("Lookalike Recommendations:\n", lookalike_results.head(10))

Lookalike Recommendations saved to Lookalike.csv
Lookalike Recommendations:
   CustomerID SimilarCustomerID     Score
0      C0001             C0069  0.999329
1      C0001             C0154  0.997034
2      C0001             C0181  0.996712
3      C0002             C0029  0.999978
4      C0002             C0088  0.996020
5      C0002             C0062  0.995831
6      C0003             C0178  0.997929
7      C0003             C0038  0.996544
8      C0003             C0189  0.996037
9      C0004             C0017  0.999095
