In [29]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [30]:
customers = pd.read_csv('customers.csv')
products = pd.read_csv('products.csv')
transactions = pd.read_csv('transactions.csv')

## Convert dates to datetime format

In [31]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

In [32]:
merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')

## Feature Engineering

In [33]:
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean'),
    TotalTransactions=('TransactionID', 'count'),
    MostPurchasedCategory=('Category', lambda x: x.value_counts().idxmax())
).reset_index()


## Encode categorical features

In [34]:
customer_features = pd.get_dummies(customer_features, columns=['MostPurchasedCategory'], drop_first=True)

## Normalize features

In [35]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features.drop('CustomerID', axis=1))


## Compute similarity

In [36]:
similarity_matrix = cosine_similarity(scaled_features)

## Generate Lookalike Recommendations for the first 20 customers

In [37]:
lookalike_results = {}
first_20_customers = customer_features['CustomerID'][:20]
for idx, customer_id in enumerate(first_20_customers):
    similar_indices = similarity_matrix[idx].argsort()[::-1][1:4]
    similar_customers = customer_features.iloc[similar_indices][['CustomerID']]
    similarity_scores = similarity_matrix[idx][similar_indices]
    lookalike_results[customer_id] = list(zip(similar_customers['CustomerID'], similarity_scores))


In [40]:
lookalike_df = pd.DataFrame({
    'CustomerID': first_20_customers,
    'Lookalikes': [lookalike_results[cust_id] for cust_id in first_20_customers]
})

lookalike_df.to_csv('Ankit_Kumar_Lookalike.csv', index=False)

print("Results saved")


Results saved
