# Task 2: Lookalike Model

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


# Merge datasets
merged_data = pd.merge(transactions, products, on='ProductID', how='inner')
merged_data = pd.merge(merged_data, customers, on='CustomerID', how='inner')

# Create customer profiles
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price': 'mean',
    'Region': 'first',
    'SignupDate': 'first'
}).reset_index()

# Normalize the features
scaler = StandardScaler()
customer_profiles_scaled = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity', 'Price']])

# Calculate similarity using cosine similarity
similarity_matrix = cosine_similarity(customer_profiles_scaled)

# Function to get top 3 similar customers for each customer
def get_lookalikes(customer_profiles, similarity_matrix, top_n=3):
    lookalike_map = {}

    for i, customer_id in enumerate(customer_profiles['CustomerID']):
        # Get similarity scores for the customer
        similarity_scores = similarity_matrix[i]

        # Exclude self and get the top n most similar customers
        similar_customer_indices = np.argsort(similarity_scores)[-top_n-1:-1][::-1]  # Exclude self (highest score)

        # Create a list of (customerID, similarity_score) pairs
        similar_customers = [(customer_profiles.iloc[j]['CustomerID'], similarity_scores[j])
                             for j in similar_customer_indices]

        # Store the top 3 similar customers
        lookalike_map[customer_id] = similar_customers

    return lookalike_map

# Get top 3 lookalikes for the first 20 customers (C0001 to C0020)
lookalike_map = get_lookalikes(customer_profiles, similarity_matrix)

# Create a DataFrame for the lookalike map
lookalike_data = []
for customer_id, similar_customers in lookalike_map.items():
    lookalike_data.append([customer_id] + [item for sublist in similar_customers for item in sublist])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'Lookalike1', 'Score1', 'Lookalike2', 'Score2', 'Lookalike3', 'Score3'])

# Save the lookalike map to a CSV file
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model has been created and saved to Lookalike.csv")