In [9]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

def build_lookalike_model(transactions_path, customers_path, products_path, output_path):

    # Load Datasets
      customers = pd.read_csv('Customers.csv')
      products = pd.read_csv('Products.csv')
      transactions = pd.read_csv('Transactions.csv')

    # Merge Datasets
      data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

    # Aggregate Data for Customers
      customer_data = data.groupby('CustomerID').agg({
        'TotalValue': 'sum',
        'Quantity': 'sum',
        'Price_y': 'mean'  # Use Price_y from Products.csv
    }).reset_index()

    # Normalize Data
      scaler = StandardScaler()
      normalized_data = scaler.fit_transform(customer_data[['TotalValue', 'Quantity', 'Price_y']])

    # Calculate Similarity Matrix
      similarity_matrix = cosine_similarity(normalized_data)

    # Generate Lookalike Recommendations
      top_lookalikes = {}
      for i, customer_id in enumerate(customer_data['CustomerID'][:20]):
        similar_customers = sorted(
            list(enumerate(similarity_matrix[i])),
            key=lambda x: x[1],
            reverse=True
        )[1:4]
        top_lookalikes[customer_id] = [
            (customer_data['CustomerID'][j], round(score, 2)) for j, score in similar_customers
        ]

    # Save Recommendations
      lookalike_df = pd.DataFrame({
        'CustomerID': list(top_lookalikes.keys()),
        'Lookalikes': list(top_lookalikes.values())
    })
      lookalike_df.to_csv(output_path, index=False)
      print(f"Lookalike recommendations saved to {output_path}")

# Example Usage
build_lookalike_model(
    transactions_path='Transactions.csv',
    customers_path='Customers.csv',
    products_path='Products.csv',
    output_path='Lookalike.csv'
)


Lookalike recommendations saved to Lookalike.csv
