# Lookalike Model Development
This notebook explains the development of a lookalike model that recommends similar customers based on their profile and transaction history.

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

## Load Data
Load and preprocess the datasets.

In [2]:
def load_data():
    """Load and preprocess all datasets"""
    try:
        customers = pd.read_csv("Customers.csv")
        products = pd.read_csv("Products.csv")
        transactions = pd.read_csv("Transactions.csv")
        
        # Ensure consistent column names
        transactions.rename(columns={'customer_id': 'CustomerID', 
                                  'product_id': 'ProductID'}, inplace=True)
        return customers, products, transactions
    except Exception as e:
        print(f"Error loading data: {e}")
        return None, None, None

customers, products, transactions = load_data()
print("Data loaded successfully")
print("Transactions shape:", transactions.shape)
print("Products shape:", products.shape)
print("Customers shape:", customers.shape)

## Create Customer Features
Create feature vectors for each customer based on their transaction history and profile.

In [3]:
def create_customer_features(customers, transactions, products):
    """Create feature vectors for each customer"""
    try:
        # Create basic purchase statistics
        purchase_stats = transactions.groupby('CustomerID').agg({
            'TotalValue': ['sum', 'mean', 'count'],
            'Quantity': ['sum', 'mean']
        }).reset_index()
        
        # Flatten column names
        purchase_stats.columns = ['CustomerID'] + [f'{col[0]}_{col[1]}' for col in purchase_stats.columns[1:]]
        
        # Create category preferences
        product_categories = transactions.merge(products[['ProductID', 'Category']], on='ProductID')
        category_pivoted = pd.crosstab(product_categories['CustomerID'], 
                                     product_categories['Category'],
                                     normalize='index')
        
        # Create recency feature
        latest_date = pd.to_datetime(transactions['TransactionDate']).max()
        recency = (transactions.groupby('CustomerID')
                  .agg({'TransactionDate': lambda x: (latest_date - pd.to_datetime(x).max()).days})
                  .rename(columns={'TransactionDate': 'recency'}))
        
        # Create region features
        region_dummies = pd.get_dummies(customers[['CustomerID', 'Region']], 
                                      prefix='region', 
                                      columns=['Region'])
        
        # Merge all features
        final_features = (purchase_stats
                         .merge(category_pivoted.reset_index(), on='CustomerID', how='left')
                         .merge(recency.reset_index(), on='CustomerID', how='left')
                         .merge(region_dummies, on='CustomerID', how='left'))
        
        # Fill missing values
        final_features = final_features.fillna(0)
        
        return final_features
        
    except Exception as e:
        print(f"Error creating features: {e}")
        print("\nDebugging info:")
        print("Purchase stats columns:", purchase_stats.columns.tolist())
        print("Category pivot columns:", category_pivoted.columns.tolist())
        print("Region dummy columns:", region_dummies.columns.tolist())
        return None

features = create_customer_features(customers, transactions, products)
print("\nFeatures created successfully")
print("Feature matrix shape:", features.shape)

## Calculate Similarity
Calculate similarity scores for target customers.

In [4]:
def calculate_similarity(features, target_customers):
    """Calculate similarity scores for target customers"""
    try:
        # Remove CustomerID for scaling
        feature_cols = features.columns.difference(['CustomerID'])
        
        # Scale features
        scaler = StandardScaler()
        scaled_features = scaler.fit_transform(features[feature_cols])
        
        # Calculate similarity matrix
        similarity_matrix = cosine_similarity(scaled_features)
        
        # Get recommendations
        recommendations = {}
        customer_indices = {cid: idx for idx, cid in enumerate(features['CustomerID'])}
        
        for target_id in target_customers:
            if target_id in customer_indices:
                target_idx = customer_indices[target_id]
                scores = similarity_matrix[target_idx]
                # Get top 3 similar customers (excluding self)
                similar_indices = np.argsort(scores)[-4:][:-1][::-1]
                
                recommendations[target_id] = [
                    (features['CustomerID'].iloc[idx], float(scores[idx]))
                    for idx in similar_indices
                ]
        
        return recommendations
        
    except Exception as e:
        print(f"Error calculating similarity: {e}")
        return None

target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
recommendations = calculate_similarity(features, target_customers)
if recommendations:
    print("\nTop 3 Lookalike Recommendations:")
    for cust_id, recs in recommendations.items():
        print(f"\nCustomer {cust_id}:")
        for rec_id, score in recs:
            print(f"  {rec_id}: {score:.4f}")

## Save Recommendations
Save the recommendations to a CSV file.

In [5]:
def save_recommendations(recommendations, output_file):
    """Save recommendations to CSV"""
    try:
        rows = []
        for cust_id, recs in recommendations.items():
            rec_str = ';'.join([f"{cid},{score:.4f}" for cid, score in recs])
            rows.append({'CustomerID': cust_id, 'Recommendations': rec_str})
        
        pd.DataFrame(rows).to_csv(output_file, index=False)
        print(f"Recommendations saved to {output_file}")
        
    except Exception as e:
        print(f"Error saving recommendations: {e}")

save_recommendations(recommendations, "Lookalike.csv")