In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

# Load datasets
customers_df = pd.read_csv('../data/Customers.csv')
products_df = pd.read_csv('../data/Products.csv')
transactions_df = pd.read_csv('../data/Transactions.csv')

# Merge customers and transactions to get a customer-product matrix
transactions_df = transactions_df.merge(customers_df[['CustomerID']], on='CustomerID', how='left')
transactions_df = transactions_df.merge(products_df[['ProductID', 'ProductName']], on='ProductID', how='left')

# Create a customer-product interaction matrix
customer_product_matrix = pd.pivot_table(transactions_df, index='CustomerID', columns='ProductID', values='Quantity', aggfunc='sum', fill_value=0)

# Calculate cosine similarity between customers based on their product purchases
cosine_sim_matrix = cosine_similarity(customer_product_matrix)

# Convert similarity matrix to a DataFrame for easier manipulation
cosine_sim_df = pd.DataFrame(cosine_sim_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

# Function to get top 3 similar customers for a given customer
def get_top_3_lookalikes(customer_id):
    # Get the similarity scores for the given customer
    similar_customers = cosine_sim_df[customer_id].sort_values(ascending=False)
    # Exclude the customer itself (first customer will have 1 similarity with itself)
    similar_customers = similar_customers.drop(customer_id)
    # Get top 3 customers and their similarity scores
    top_3_similar = similar_customers.head(3)
    return [(cust_id, score) for cust_id, score in zip(top_3_similar.index, top_3_similar.values)]

# Prepare the lookalike recommendations for the first 20 customers
lookalike_recommendations = defaultdict(list)
for i in range(1, 21):  # For customers C0001 to C0020
    customer_id = f'C{i:04d}'  # Format customer ID (C0001, C0002, ..., C0020)
    lookalike_recommendations[customer_id] = get_top_3_lookalikes(customer_id)

# Convert the recommendations to a DataFrame
lookalike_data = []
for customer_id, recommendations in lookalike_recommendations.items():
    for rec in recommendations:
        lookalike_data.append([customer_id, rec[0], rec[1]])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'RecommendedCustomerID', 'SimilarityScore'])

# Save the lookalike recommendations to CSV
lookalike_df.to_csv('Brijesh_Vadaliya_Lookalike.csv', index=False)

print("Lookalike recommendations have been saved to 'Brijesh_Vadaliya_Lookalike.csv'")


Lookalike recommendations have been saved to 'Lookalike.csv'
