In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [79]:
customers = pd.read_csv('/kaggle/input/zeotap-1/Customers.csv')
products = pd.read_csv('/kaggle/input/zeotap-1/Products.csv')
transactions = pd.read_csv('/kaggle/input/zeotap-1/Transactions.csv')


In [80]:
interaction_matrix = transactions.pivot_table(
    index="CustomerID", 
    columns="ProductID", 
    values="TotalValue", 
    aggfunc="sum"
).fillna(0)

# Normalize the interaction matrix using MinMaxScaler
scaler = MinMaxScaler()
interaction_matrix_scaled = scaler.fit_transform(interaction_matrix)

interaction_matrix_norm = pd.DataFrame(
    interaction_matrix_scaled, 
    index=interaction_matrix.index, 
    columns=interaction_matrix.columns
)

In [81]:
cosine_sim = cosine_similarity(interaction_matrix_norm)

cosine_sim_df = pd.DataFrame(
    cosine_sim, 
    index=interaction_matrix.index, 
    columns=interaction_matrix.index
)

In [82]:
latest_date = transactions["TransactionDate"].max()
transactions["Recency"] = (
    pd.to_datetime(latest_date) - pd.to_datetime(transactions["TransactionDate"])
).dt.days
rfm = transactions.groupby("CustomerID").agg(
    Recency=("Recency", "min"),       # Minimum days since the last transaction
    Frequency=("TransactionID", "count"),  # Total number of transactions
    Monetary=("TotalValue", "sum")   # Total monetary value
)
rfm_scaled = scaler.fit_transform(rfm)

rfm_norm = pd.DataFrame(rfm_scaled, index=rfm.index, columns=rfm.columns)

# Compute Cosine Similarity for RFM Metrics
rfm_sim = cosine_similarity(rfm_norm)

# Create a DataFrame for RFM-based similarity
rfm_sim_df = pd.DataFrame(
    rfm_sim, 
    index=rfm.index, 
    columns=rfm.index
)

hybrid_sim = (cosine_sim_df * 0.6) + (rfm_sim_df * 0.4)

In [83]:
def get_top_lookalikes(customer_id, similarity_matrix, top_n=3):
    
    scores = similarity_matrix[customer_id].sort_values(ascending=False)
    
    top_lookalikes = scores.iloc[1:top_n+1]
    return list(zip(top_lookalikes.index, top_lookalikes.values))

In [84]:
lookalikes = {}
for customer_id in interaction_matrix.index[:20]:  # First 20 customers
    lookalikes[customer_id] = get_top_lookalikes(customer_id, hybrid_sim)

In [87]:
lookalike_df = pd.DataFrame(
    [{"cust_id": str(cust), "lookalikes": lookalikes[cust]} for cust in lookalikes]
)

# Save the lookalike data to CSV
lookalike_df.to_csv("Lookalike.csv", index=False)