In [None]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Preparing data for the Lookalike Model
# Merge customers and transactions data
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spend per customer
    'TransactionID': 'count'  # Number of transactions per customer
}).rename(columns={'TransactionID': 'NumTransactions'}).reset_index()

# Merge with customer profiles
customer_profiles = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)

# Add region as a one-hot encoded feature
customer_profiles = pd.get_dummies(customer_profiles, columns=['Region'], drop_first=True)

# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['TotalValue', 'NumTransactions']
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

# Calculate cosine similarity
customer_features = customer_profiles.drop(['CustomerID', 'CustomerName', 'SignupDate'], axis=1)
similarity_matrix = cosine_similarity(customer_features)

# Find top 3 similar customers for the first 20 customers
lookalike_map = {}
for idx, customer_id in enumerate(customer_profiles['CustomerID'][:20]):
    similar_indices = np.argsort(-similarity_matrix[idx])  # Sort in descending order
    similar_customers = [
        (customer_profiles.iloc[i]['CustomerID'], similarity_matrix[idx][i])
        for i in similar_indices if i != idx
    ][:3]  # Exclude self and take top 3
    lookalike_map[customer_id] = similar_customers

# Convert to DataFrame
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'SimilarCustomers': [str(val) for val in lookalike_map.values()]
})

# Save Lookalike Model results to CSV
lookalike_df.to_csv('Lookalike_Model_Results.csv', index=False)

# Display results
print(lookalike_df.head())
