# Customer Lookalike Model

## 1. Feature Engineering

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load and prepare data
customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Create customer features
def create_customer_features(customers_df, transactions_df):
    # Calculate customer metrics
    customer_metrics = transactions_df.groupby('CustomerID').agg({
        'TotalValue': ['sum', 'mean', 'count'],
        'Quantity': ['sum', 'mean']
    })
    
    # Flatten column names
    customer_metrics.columns = ['total_spend', 'avg_transaction', 'transaction_count',
                               'total_quantity', 'avg_quantity']
    
    return customer_metrics

customer_features = create_customer_features(customers_df, transactions_df)

In [None]:
# Find similar customers
def find_similar_customers(customer_id, features_df, n_similar=3):
    # Calculate similarity
    similarity_matrix = cosine_similarity(features_df)
    
    # Get customer index
    customer_idx = features_df.index.get_loc(customer_id)
    
    # Get similar customers
    similar_scores = similarity_matrix[customer_idx]
    similar_customers = list(zip(features_df.index, similar_scores))
    
    # Sort and filter
    similar_customers.sort(key=lambda x: x[1], reverse=True)
    
    # Return top N similar customers (excluding self)
    return similar_customers[1:n_similar+1]

# Generate lookalike recommendations for first 20 customers
lookalike_results = {}
for customer_id in customers_df['CustomerID'][:20]:
    similar = find_similar_customers(customer_id, customer_features)
    lookalike_results[customer_id] = similar

In [None]:
# Save results to CSV
results_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'SimilarCustomers': [str(v) for v in lookalike_results.values()]
})
results_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)