# Task 2: Lookalike Model 

In [1]:
import pandas as pd

# Load the datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import csv

# Merge customer and transaction data
customer_transactions = transactions_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'TransactionDate': 'count'
}).reset_index()
customer_transactions = customer_transactions.merge(customers_df, on='CustomerID')

# Feature engineering
features = customer_transactions[['TotalValue', 'Quantity', 'TransactionDate']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

# Function to get top 3 similar customers
def get_top_similar_customers(customer_index, similarity_matrix, top_n=3):
    similar_customers = list(enumerate(similarity_matrix[customer_index]))
    similar_customers = sorted(similar_customers, key=lambda x: x[1], reverse=True)[1:top_n+1]
    return similar_customers

# Example: Get top 3 similar customers for the first 20 customers
lookalike_results = {}
for i in range(20):
    customer_id = customer_transactions.iloc[i]['CustomerID']
    similar_customers = get_top_similar_customers(i, similarity_matrix)
    # Convert np.float64 to regular float
    similar_customers_cleaned = [(customer_transactions.iloc[j]['CustomerID'], float(score)) for j, score in similar_customers]
    lookalike_results[customer_id] = similar_customers_cleaned

# Save results to CSV
with open('Ayushman_Saxena_Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'SimilarCustomers'])
    for customer_id, similar_customers in lookalike_results.items():
        writer.writerow([customer_id, similar_customers])
