# **Prepairing features for similiarity calculation, then Calculating the category preferences aand Combining All features**

In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

class CustomerLookalikeModel:
    def __init__(self):
        self.scaler = StandardScaler()
        self.feature_matrix = None
        self.customers_df = None
        
    def preprocess_features(self, customers_df, transactions_df, products_df):

        self.customers_df = customers_df.copy()
        
        self.customers_df['SignupDays'] = pd.to_datetime(self.customers_df['SignupDate']).apply(
            lambda x: (datetime(2024, 1, 1) - x).days
        )
        
        region_dummies = pd.get_dummies(self.customers_df['Region'], prefix='Region')
        
    
        customer_stats = transactions_df.groupby('CustomerID').agg({
            'TotalValue': ['sum', 'mean', 'count'],
            'Quantity': ['sum', 'mean']
        }).fillna(0)
        customer_stats.columns = [
            'total_spend', 'avg_transaction_value', 'transaction_count',
            'total_items', 'avg_items_per_transaction'
        ]
        

        customer_categories = (
            transactions_df
            .merge(products_df, on='ProductID')
            .groupby(['CustomerID', 'Category'])
            .agg({'TotalValue': 'sum'})
            .reset_index()
            .pivot(index='CustomerID', columns='Category', values='TotalValue')
            .fillna(0)
        )
        customer_categories.columns = [f'spend_{col.lower()}' for col in customer_categories.columns]
        

        feature_df = (
            self.customers_df
            .set_index('CustomerID')
            .join(region_dummies)
            .join(customer_stats)
            .join(customer_categories)
            .fillna(0)
        )
        

        feature_df = feature_df.select_dtypes(include=[np.number])
        

        self.feature_matrix = self.scaler.fit_transform(feature_df)
        self.feature_matrix = pd.DataFrame(
            self.feature_matrix,
            index=feature_df.index,
            columns=feature_df.columns
        )
        
        return self
    
    def find_lookalikes(self, customer_id, n_recommendations=3):

        if customer_id not in self.feature_matrix.index:
            return []
        

        customer_vector = self.feature_matrix.loc[customer_id].values.reshape(1, -1)
        similarity_scores = cosine_similarity(customer_vector, self.feature_matrix)[0]
        

        similar_customers = pd.Series(
            similarity_scores,
            index=self.feature_matrix.index
        ).sort_values(ascending=False)
        

        similar_customers = similar_customers[similar_customers.index != customer_id][:n_recommendations]
        
        return [(cust_id, score) for cust_id, score in similar_customers.items()]

    def generate_lookalike_recommendations(self, customer_ids, n_recommendations=3):

        recommendations = {}
        
        for customer_id in customer_ids:
            lookalikes = self.find_lookalikes(customer_id, n_recommendations)
            recommendations[customer_id] = lookalikes
            
        return recommendations
        #Copyright to Vastav Bishnoi

    def save_recommendations_to_csv(self, recommendations, output_file):

        rows = []
        for customer_id, lookalikes in recommendations.items():
            row = {
                'customer_id': customer_id,
                'lookalike_1': lookalikes[0][0] if len(lookalikes) > 0 else '',
                'score_1': lookalikes[0][1] if len(lookalikes) > 0 else '',
                'lookalike_2': lookalikes[1][0] if len(lookalikes) > 1 else '',
                'score_2': lookalikes[1][1] if len(lookalikes) > 1 else '',
                'lookalike_3': lookalikes[2][0] if len(lookalikes) > 2 else '',
                'score_3': lookalikes[2][1] if len(lookalikes) > 2 else ''
            }
            rows.append(row)
            
        recommendations_df = pd.DataFrame(rows)
        recommendations_df.to_csv(output_file, index=False)

# **Intilialization and Training of the model and then sving it as .csv file**

In [53]:

model = CustomerLookalikeModel()
model.preprocess_features(customers_df, transactions_df, products_df)


target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]
recommendations = model.generate_lookalike_recommendations(target_customers)


model.save_recommendations_to_csv(recommendations, 'Vastav_Bishnoi_Lookalike.csv')