In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Merge datasets
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])
merged_data = transactions_df.merge(products_df, on='ProductID', how='left')
merged_data = merged_data.merge(customers_df[['CustomerID', 'Region']], on='CustomerID', how='left')

# Feature Engineering
# 1. Total Spending, Number of Transactions, and Quantity per Customer
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'Quantity': 'sum'  # Total quantity purchased
}).rename(columns={'TransactionID': 'NumTransactions', 'TotalValue': 'TotalSpending'})

# 2. Preferred Product Categories
category_pivot = pd.pivot_table(
    merged_data, values='Quantity', index='CustomerID', columns='Category', aggfunc='sum', fill_value=0
)
customer_features = customer_features.join(category_pivot)

# 3. Encode Region
region_encoded = pd.get_dummies(customers_df.set_index('CustomerID')['Region'])
customer_features = customer_features.join(region_encoded)

# Standardize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)

# Similarity Calculation
similarity_matrix = cosine_similarity(scaled_features)

# Build Recommendations
customer_ids = customer_features.index.tolist()
recommendations = {}

for i, customer_id in enumerate(customer_ids):
    # Get similarity scores for the customer
    similarity_scores = list(enumerate(similarity_matrix[i]))
    # Sort by score (highest first) and exclude the customer themselves
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    # Save top 3 similar customers and their scores
    recommendations[customer_id] = [(customer_ids[j], round(score, 4)) for j, score in sorted_scores]

# Convert to DataFrame for Lookalike.csv
recommendations_df = pd.DataFrame({
    'CustomerID': recommendations.keys(),
    'Recommendations': [str(value) for value in recommendations.values()]
})

# Save Lookalike.csv
recommendations_df.to_csv('Reemasen_Tungala_Lookalike.csv', index=False)

print("Lookalike Model Completed. Recommendations saved to 'Reemasen_Tungala_Lookalike.csv'.")


Lookalike Model Completed. Recommendations saved to 'Reemasen_Tungala_Lookalike.csv'.
