In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

In [None]:
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')


In [None]:
customer_product_matrix = merged_df.pivot_table(
    index='CustomerID',
    columns='ProductName',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

In [None]:
similarity_matrix = cosine_similarity(customer_product_matrix)


In [None]:
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)


In [None]:
lookalike_results = {}

for customer_id in customers_df['CustomerID'][:20]:
    if customer_id in similarity_df.index:
        similar_customers = (
            similarity_df[customer_id]
            .sort_values(ascending=False)
            .iloc[1:4]
        )
        lookalike_results[customer_id] = list(similar_customers.items())


In [None]:
lookalike_df = pd.DataFrame([
    {
        'CustomerID': cust_id,
        'Lookalikes': str([{sim[0]: sim[1]} for sim in lookalikes])
    }
    for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv("Anshul_Meshram_Lookalike.csv", index=False)

In [None]:
print("Lookalike recommendations generated and saved to Anshul_Meshram_Lookalike.csv.")

Lookalike recommendations generated and saved to FirstName_LastName_Lookalike.csv.
