In [11]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import csv

# Load data
customers = pd.read_csv(r"C:\Users\ajayc\Downloads\Customers.csv")
transactions = pd.read_csv(r"C:\Users\ajayc\Downloads\Transactions.csv")

# Prepare data for similarity calculation
# Aggregate transaction data
transaction_summary = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Merge with customer data
customer_data = pd.merge(customers, transaction_summary, on='CustomerID', how='left')
customer_data.fillna(0, inplace=True)  # Handle missing values

# Select features and standardize
features = customer_data[['TotalValue', 'Quantity']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Calculate similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

# Define function to get top 3 similar customers
def get_top_n(sim_matrix, customer_ids, n=3):
    results = {}
    for idx, row in enumerate(sim_matrix):
        # Sort similarity scores (descending) and exclude self-similarity
        similar_indices = row.argsort()[-n-1:-1][::-1]
        results[customer_ids[idx]] = [
            (customer_ids[i], row[i]) for i in similar_indices
        ]
    return results

# Extract customer IDs
customer_ids = customer_data['CustomerID'].tolist()

# Get lookalike data
lookalikes = get_top_n(similarity_matrix, customer_ids)

# Write to CSV
with open('FirstName_LastName_Lookalike.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['CustomerID', 'SimilarCustomerID', 'Score'])
    for cust_id, similars in lookalikes.items():
        for similar_cust_id, score in similars:
            writer.writerow([cust_id, similar_cust_id, score])

print("Lookalike.csv created successfully!")



Lookalike.csv created successfully!
