In [6]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge data to create a comprehensive dataset
transactions = transactions.merge(products, on='ProductID')
transactions = transactions.merge(customers, on='CustomerID')

# Feature engineering: Aggregate transaction data by customer
customer_features = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum'),
    unique_categories=('Category', lambda x: x.nunique()),
    unique_products=('ProductID', 'nunique')
).reset_index()

# Add region as a categorical feature
customer_features = customer_features.merge(customers[['CustomerID', 'Region']], on='CustomerID')
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Normalize numerical features
scaler = StandardScaler()
numeric_features = ['total_spent', 'total_quantity', 'unique_categories', 'unique_products']
customer_features[numeric_features] = scaler.fit_transform(customer_features[numeric_features])

# Compute similarity matrix
feature_matrix = customer_features.drop(columns=['CustomerID']).values
similarity_matrix = cosine_similarity(feature_matrix)

# Recommend top 3 lookalikes for the first 20 customers
customer_ids = customer_features['CustomerID'].values
lookalike_results = []

for idx, customer_id in enumerate(customer_ids[:20]):
    similarities = similarity_matrix[idx]
    similar_indices = similarities.argsort()[::-1][1:4]  # Exclude the customer itself
    lookalikes = [(customer_ids[i], similarities[i]) for i in similar_indices]
    lookalike_results.append({
        'CustomerID': customer_id,
        'Lookalike1': lookalikes[0][0], 'Score1': lookalikes[0][1],
        'Lookalike2': lookalikes[1][0], 'Score2': lookalikes[1][1],
        'Lookalike3': lookalikes[2][0], 'Score3': lookalikes[2][1]
    })

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike Model completed. Results saved to 'Lookalike.csv'.")

Lookalike Model completed. Results saved to 'Lookalike.csv'.
