In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

### Feature engineering

In [4]:
transactions = pd.read_csv('Transactions.csv')

In [None]:
customer_features = transactions.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()

# Normalize the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features[['Quantity', 'TotalValue']])

# Compute cosine similarity
similarity_matrix = cosine_similarity(features_scaled)

# Recommend top 3 similar customers for C0001-C0020
lookalikes = {}
for i in range(20):
    customer_id = customer_features.iloc[i]['CustomerID']
    similar_indices = similarity_matrix[i].argsort()[::-1][1:4]
    lookalikes[customer_id] = [
        (customer_features.iloc[j]['CustomerID'], similarity_matrix[i][j]) for j in similar_indices
    ]

# Save to CSV
lookalike_df = pd.DataFrame([
    {'CustomerID': cust, 'Lookalikes': lookalikes[cust]} for cust in lookalikes
])
lookalike_df.to_csv('Yash_Sonar_Lookalike.csv', index=False)
