In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [10]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [13]:
user_item_matrix = merged_data.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)

In [20]:
scaler = StandardScaler()
user_item_matrix_scaled = scaler.fit_transform(user_item_matrix)

In [24]:
similarity_matrix = cosine_similarity(user_item_matrix_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=user_item_matrix.index, columns=user_item_matrix.index)

In [38]:
def get_top_lookalikes(customer_id, n=3):
    similar_customers = similarity_df[customer_id].nlargest(n) 
    return similar_customers.index[1:], similar_customers.values[1:]

In [42]:
lookalike_map = {}
for cust_id in user_item_matrix.index[:20]:
    lookalikes, scores = get_top_lookalikes(cust_id)
    lookalike_map[cust_id] = [(str(cust), round(score, 2)) for cust, score in zip(lookalikes, scores)]

In [48]:
lookalike_records = []
for cust_id, lookalikes in lookalike_map.items():
    for lookalike, score in lookalikes:
        lookalike_records.append({'CustomerID': cust_id, 'LookalikeID': lookalike, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_records)

lookalike_df.to_csv('Bhagyashree_Kalambe_Lookalike.csv', index = False)