In [2]:
# Import libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets
merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')

# Feature engineering
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'mean',
    'Category': lambda x: x.mode()[0],  # Most frequent category
    'Region': 'first'  # Region is the same for a customer
}).reset_index()

# One-hot encode categorical features
encoded_features = pd.get_dummies(customer_features, columns=['Category', 'Region'])

# Normalize numerical features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(encoded_features.drop('CustomerID', axis=1))

# Compute similarity
similarity_matrix = cosine_similarity(normalized_features)

# Recommend top 3 similar customers for the first 20 customers
similar_customers = {}
for i in range(20):  # Adjust range as needed
    customer_id = customer_features.iloc[i]['CustomerID']
    similarities = list(enumerate(similarity_matrix[i]))
    sorted_similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]  # Exclude self
    similar_customers[customer_id] = [(customer_features.iloc[j]['CustomerID'], score) for j, score in sorted_similarities]

# Save recommendations to a CSV
lookalike_df = pd.DataFrame({
    'CustomerID': similar_customers.keys(),
    'Similar_Customers': [str(v) for v in similar_customers.values()]
})
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model completed. Results saved to Lookalike.csv.")


Lookalike model completed. Results saved to Lookalike.csv.
