# TASK 2

Insights:
1. Recommendation Approach:
   - Comprehensive feature engineering using:
     * Transaction history
     * Spending patterns
     * Product category preferences
   - Cosine similarity for customer matching

2. Key Methodology:
   - Standardized feature scaling
   - Considers multiple customer attributes
   - Provides personalized recommendations

3. Potential Business Applications:
   - Targeted marketing
   - Personalized customer engagement
   - Customer base expansion


In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

def build_customer_features(customers, transactions, products):
    # Merge transactions with products
    tx_products = transactions.merge(products, on='ProductID')

    # Calculate customer metrics
    customer_features = pd.DataFrame()

    # Basic customer info
    customer_features['account_age'] = (pd.Timestamp.now() -
                                      pd.to_datetime(customers['SignupDate'])).dt.days

    # Transaction patterns
    tx_metrics = tx_products.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': ['sum', 'mean'],
        'Quantity': ['sum', 'mean'],
        'Price_y': 'mean'
    }).fillna(0)

    tx_metrics.columns = ['num_transactions', 'total_spend', 'avg_transaction_value',
                         'total_items', 'avg_items_per_tx', 'avg_price_preference']

    # Category preferences
    category_pivot = pd.pivot_table(
        tx_products,
        values='Quantity',
        index='CustomerID',
        columns='Category',
        aggfunc='sum',
        fill_value=0
    )

    # Combine features
    customer_features = customer_features.join(tx_metrics, how='outer')
    customer_features = customer_features.join(category_pivot, how='outer')

    # Fill NaN values
    customer_features = customer_features.fillna(0)

    return customer_features

def find_lookalikes(customer_id, feature_matrix, n_recommendations=3):
    # Get customer vector
    customer_vector = feature_matrix.loc[customer_id].values.reshape(1, -1)

    # Calculate similarity scores
    similarity_scores = cosine_similarity(customer_vector, feature_matrix)[0]

    # Get top similar customers (excluding self)
    similar_indices = np.argsort(similarity_scores)[::-1][1:n_recommendations+1]
    similar_scores = similarity_scores[similar_indices]

    similar_customers = pd.DataFrame({
        'customer_id': feature_matrix.index[similar_indices],
        'similarity_score': similar_scores
    })

    return similar_customers

# Main execution
def generate_lookalikes(customers, transactions, products, target_customers):
    # Build feature matrix
    feature_matrix = build_customer_features(customers, transactions, products)

    # Scale features
    scaler = StandardScaler()
    scaled_features = pd.DataFrame(
        scaler.fit_transform(feature_matrix),
        index=feature_matrix.index,
        columns=feature_matrix.columns
    )

    # Generate recommendations for target customers
    results = {}
    for cust_id in target_customers:
        lookalikes = find_lookalikes(cust_id, scaled_features)
        results[cust_id] = lookalikes.to_dict('records')

    return results

# Generate lookalikes for C0001-C0020
target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
lookalike_results = generate_lookalikes(customers, transactions, products, target_customers)

# Save results to CSV
output_data = []
for cust_id, recommendations in lookalike_results.items():
    rec_list = [f"{rec['customer_id']}:{rec['similarity_score']:.3f}"
                for rec in recommendations]
    output_data.append({
        'customer_id': cust_id,
        'lookalikes': '|'.join(rec_list)
    })

pd.DataFrame(output_data).to_csv('Lookalike.csv', index=False)