In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load data
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

# Merge datasets for comprehensive analysis
merged_data = transactions.merge(customers, on="CustomerID", how="left")
merged_data = merged_data.merge(products, on="ProductID", how="left")

# Feature Engineering: Aggregate transaction data for customers
customer_features = merged_data.groupby("CustomerID").agg({
    'TotalValue': 'sum',  # Total value of all transactions by the customer
    'Quantity': 'sum',    # Total quantity of products purchased
    'TransactionID': 'count'  # Number of transactions
}).rename(columns={
    'TotalValue': 'TotalSpent',
    'Quantity': 'TotalQuantity',
    'TransactionID': 'TransactionCount'
})

# Add customer profile data
customer_features = customer_features.join(customers.set_index('CustomerID'), on='CustomerID')

# Convert categorical region into numerical encoding
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Normalize data for similarity computation
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features.drop(['CustomerName', 'SignupDate'], axis=1))

# Compute similarity matrix
similarity_matrix = cosine_similarity(features_scaled)

# Create a DataFrame for similarity matrix
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

# Function to get top 3 similar customers
def get_top_similar(customer_id, top_n=3):
    if customer_id not in similarity_df.index:
        return []
    similar_scores = similarity_df.loc[customer_id].sort_values(ascending=False)
    similar_scores = similar_scores.iloc[1:top_n+1]  # Exclude self-similarity
    return [(index, score) for index, score in similar_scores.items()]

# Generate lookalike data for first 20 customers
lookalike_results = {}
for customer_id in customer_features.index[:20]:
    lookalike_results[customer_id] = get_top_similar(customer_id)

# Save lookalike results to CSV
lookalike_output = []
for cust_id, lookalikes in lookalike_results.items():
    for similar_id, score in lookalikes:
        lookalike_output.append({'CustomerID': cust_id, 'SimilarCustomerID': similar_id, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_output)
lookalike_df.to_csv("Pardhiv_Varma_Lookalike.csv", index=False)

print("Lookalike model completed. Results saved to Pardhiv_Varma_Lookalike.csv.")

Lookalike model completed. Results saved to Pardhiv_Varma_Lookalike.csv.
