<a href="https://colab.research.google.com/github/deepbairagya/eCommerce-Transactions-Dataset-for-Data-Science/blob/main/Lookalike_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load datasets

In [2]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Preprocessing
# Aggregating transaction data to calculate total spend, product categories bought, etc.

In [3]:
transactions_agg = transactions_df.merge(products_df, on='ProductID', how='left')


# Aggregate customer spending and product category preference

In [4]:
customer_transactions = (
    transactions_agg.groupby('CustomerID')
    .agg({
        'TotalValue': 'sum',
        'Category': lambda x: list(x),
    })
    .rename(columns={'TotalValue': 'TotalSpent'})
)

# Add a column for product category counts

In [5]:
for category in products_df['Category'].unique():
    customer_transactions[category] = customer_transactions['Category'].apply(lambda x: x.count(category))

# Merge customer profiles with transaction data

In [6]:
customers_merged = customers_df.merge(customer_transactions, on='CustomerID', how='left').fillna(0)


# Standardizing numerical data

In [7]:
scaler = StandardScaler()
numerical_columns = ['TotalSpent'] + list(products_df['Category'].unique())
customers_merged[numerical_columns] = scaler.fit_transform(customers_merged[numerical_columns])

# Encoding Region as numerical values

In [8]:
customers_merged['Region'] = customers_merged['Region'].astype('category').cat.codes

# Selecting features for similarity computation

In [9]:
features = ['Region'] + numerical_columns
customer_features = customers_merged[features].values


# Compute cosine similarity between customers

In [10]:
similarity_matrix = cosine_similarity(customer_features)


# Build a lookalike recommendation map

In [18]:
lookalike_map = {}

for idx, customer_id in enumerate(customers_merged['CustomerID']):
    # Get similarity scores for the customer
    scores = similarity_matrix[idx]
    # Rank customers based on similarity
    ranked_customers = np.argsort(scores)[::-1]  # Sort descending
    ranked_scores = scores[ranked_customers]
    ranked_customer_ids = customers_merged.iloc[ranked_customers]['CustomerID'].values

    # Exclude the customer itself and take the top 3 lookalikes
    lookalikes = [(ranked_customer_ids[i], ranked_scores[i]) for i in range(1, 4)]
    lookalike_map[customer_id] = lookalikes

# Filter for the first 20 customers (C0001 - C0020)

In [19]:
filtered_lookalike_map = {cust_id: lookalike_map[cust_id] for cust_id in customers_merged['CustomerID'][:20]}

# Convert to DataFrame for export

In [20]:
lookalike_df = pd.DataFrame({
    'CustomerID': filtered_lookalike_map.keys(),
    'Lookalikes': [
        [(lookalike[0], round(lookalike[1], 3)) for lookalike in filtered_lookalike_map[cust_id]]
        for cust_id in filtered_lookalike_map.keys()
    ]
})

# Save to CSV

In [21]:
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike recommendations saved to 'Lookalike.csv'.")

Lookalike recommendations saved to 'Lookalike.csv'.
