In [1]:
# TASK 2
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
def load_data():
    customers = pd.read_csv("Customers.csv")
    transactions = pd.read_csv("Transactions.csv")
    return customers, transactions

# Build Lookalike Model
def build_lookalike_model(customers, transactions):
    # Aggregate transaction data
    customer_features = transactions.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum'
    }).reset_index()

    # Merge with customer data
    customer_features = customer_features.merge(customers, on='CustomerID')

    # Select features for similarity computation
    feature_data = customer_features[['TotalValue', 'Quantity']]

    # Compute similarity matrix
    similarity_matrix = cosine_similarity(feature_data)
    customer_ids = customer_features['CustomerID'].tolist()

    # Generate recommendations for the first 20 customers
    recommendations = {}
    for i, cust_id in enumerate(customer_ids[:20]):
        similar_indices = np.argsort(-similarity_matrix[i])[:4]
        similar_customers = [
            (customer_ids[idx], similarity_matrix[i][idx]) for idx in similar_indices if idx != i
        ][:3]
        recommendations[cust_id] = similar_customers

    # Save results to a CSV file
    lookalike_df = pd.DataFrame({
        'CustomerID': list(recommendations.keys()),
        'SimilarCustomers': [str(v) for v in recommendations.values()]
    })

    lookalike_df.to_csv("Lookalike.csv", index=False)
    print("Lookalike recommendations saved to Lookalike.csv")

# Main Function
def main():
    customers, transactions = load_data()
    build_lookalike_model(customers, transactions)

if __name__ == "__main__":
    main()

Lookalike recommendations saved to Lookalike.csv


# Explanation:

# Feature Engineering:

# Customer-level aggregates for TotalValue (total transaction value) and Quantity (total products purchased) were created from the Transactions.csv file.

# These features were chosen as they capture a customer's purchasing behavior effectively.

# Cosine Similarity:

# Cosine similarity was used to measure the similarity between customers based on their purchasing profiles.

# This metric ensures that customers with similar spending patterns, regardless of scale, are grouped together.

# Top Recommendations:

# For each customer, the top 3 most similar customers were identified, excluding themselves.

# Example Output:

# CustomerID

# SimilarCustomers

# C0001

# [(C0005, 0.98), (C0010, 0.95), (C0008, 0.93)]

# C0002

# [(C0003, 0.96), (C0007, 0.94), (C0011, 0.92)]

# Business Insights:

# Personalized Recommendations:

# By identifying similar customers, businesses can create targeted marketing strategies, such as recommending products popular among a customer’s lookalikes.

# Cross-Selling Opportunities:

# If similar customers frequently buy certain products, these can be recommended to other customers in the group to increase sales.

# Customer Retention:

# Similarity scores can help identify customers with purchasing patterns similar to high-value clients. Retention efforts can then focus on these segments.

# Customer Behavior Analysis:

# Clustering based on similarity provides deeper insights into consumer behavior, such as preferences for product categories or price points.

# Loyalty Program Design:

# Grouping similar customers allows for the creation of tailored loyalty programs to enhance engagement and increase lifetime value.