In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
customers_df = pd.read_csv("C:\\Users\\ASUS\\Downloads\\Customers.csv")
products_df = pd.read_csv("C:\\Users\\ASUS\\Downloads\\Products.csv")
transactions_df = pd.read_csv("C:\\Users\\ASUS\\Downloads\\Transactions.csv")

In [3]:
transactions_products = transactions_df.merge(products_df, on="ProductID", how="left")

In [4]:
full_data = transactions_products.merge(customers_df, on="CustomerID", how="left")

In [5]:
customer_features = (
    full_data.groupby("CustomerID")
    .agg(
        total_spending=("TotalValue", "sum"),
        total_transactions=("TransactionID", "count"),
        avg_transaction_value=("TotalValue", "mean"),
        most_purchased_category=("Category", lambda x: x.mode()[0] if not x.mode().empty else None),
        region=("Region", "first"),
        signup_date=("SignupDate", "first"),
    )
    .reset_index()
)

In [6]:
customer_features_encoded = pd.get_dummies(customer_features, columns=["region", "most_purchased_category"])

In [7]:
feature_columns = customer_features_encoded.columns.difference(["CustomerID", "signup_date"])
feature_matrix = customer_features_encoded[feature_columns].values

In [8]:
similarity_matrix = cosine_similarity(feature_matrix)

In [10]:
def get_top_similar(customers_df, similarity_matrix, top_n=3):
    top_similarities = {}
    for i, customer_id in enumerate(customers_df["CustomerID"][:20]):
        sim_scores = similarity_matrix[i]
        top_indices = np.argsort(sim_scores)[::-1][1 : top_n + 1]
        top_customers = customers_df.iloc[top_indices]["CustomerID"].values
        top_scores = sim_scores[top_indices]
        top_similarities[customer_id] = list(zip(top_customers, top_scores))
    return top_similarities

In [11]:
lookalike_results = get_top_similar(customer_features_encoded, similarity_matrix)

In [12]:
lookalike_df = pd.DataFrame(
    [
        {"CustomerID": cust_id, "Lookalikes": lookalikes}
        for cust_id, lookalikes in lookalike_results.items()
    ]
)

In [14]:
lookalike_df.to_csv("C:\\Users\\ASUS\\Downloads\\Lookalike_Results.csv", index=False)

print("Lookalike model results saved to Lookalike_Results.csv")


Lookalike model results saved to Lookalike_Results.csv
