# Import necessary libraries

In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Load datasets

In [18]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Convert date columns to datetime
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

# Create customer features
customer_features = customers_df.copy()

# Add transaction-based features
trans_agg = transactions_df.groupby('CustomerID').agg({
    'TransactionID': 'count',
    'TotalValue': ['sum', 'mean'],
    'Quantity': ['sum', 'mean']
})
trans_agg.columns = ['transaction_count', 'total_spend', 'avg_transaction_value', 'total_quantity', 'avg_quantity']
trans_agg = trans_agg.reset_index()
customer_features = customer_features.merge(trans_agg, on='CustomerID', how='left')

# Add category preferences
category_data = transactions_df.merge(products_df, on='ProductID')
category_pivot = pd.pivot_table(category_data, values='TotalValue', index='CustomerID', columns='Category', aggfunc='sum', fill_value=0)
customer_features = customer_features.merge(category_pivot, on='CustomerID', how='left')

# Fill NaN values
customer_features = customer_features.fillna(0)

# Prepare features for similarity calculation
feature_cols = ['total_spend', 'transaction_count', 'avg_transaction_value', 'total_quantity', 'avg_quantity'] + list(category_pivot.columns)
X = customer_features[feature_cols]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Calculate similarity matrix
similarity_matrix = cosine_similarity(X_scaled)


# Create recommendation

In [19]:
lookalike_recommendations = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    # Get similarity scores for current customer
    customer_similarities = similarity_matrix[idx]

    # Get indices of top 3 similar customers (excluding self)
    similar_indices = np.argsort(customer_similarities)[::-1][1:4]

    # Store recommendations with similarity scores
    recommendations = []
    for similar_idx in similar_indices:
        similar_customer_id = customer_features.iloc[similar_idx]['CustomerID']
        similarity_score = customer_similarities[similar_idx]
        recommendations.append((similar_customer_id, round(similarity_score, 3)))

    lookalike_recommendations[customer_id] = recommendations

# Prepare data for CSV output

In [20]:
csv_data = []
for cust_id, recommendations in lookalike_recommendations.items():
    if cust_id in customers_df['CustomerID'].iloc[:20].values:
        row = {
            'CustomerID': cust_id,
            'Lookalike1': recommendations[0][0],
            'Score1': recommendations[0][1],
            'Lookalike2': recommendations[1][0],
            'Score2': recommendations[1][1],
            'Lookalike3': recommendations[2][0],
            'Score3': recommendations[2][1]
        }
        csv_data.append(row)

# Create DataFrame and save to CSV
recommendations_df = pd.DataFrame(csv_data)
recommendations_df.to_csv('John_Joshua_Lookalike.csv', index=False)

print("Lookalike recommendations for the first 20 customers:")
print(recommendations_df)
print("\nLookalike recommendations saved to John_Joshua_Lookalike.csv")

Lookalike recommendations for the first 20 customers:
   CustomerID Lookalike1  Score1 Lookalike2  Score2 Lookalike3  Score3
0       C0001      C0069   0.936      C0072   0.862      C0183   0.823
1       C0002      C0036   0.862      C0062   0.849      C0055   0.846
2       C0003      C0144   0.786      C0197   0.784      C0166   0.771
3       C0004      C0075   0.977      C0065   0.929      C0113   0.868
4       C0005      C0085   0.899      C0130   0.868      C0007   0.856
5       C0006      C0185   0.914      C0196   0.897      C0079   0.864
6       C0007      C0085   0.943      C0005   0.856      C0197   0.824
7       C0008      C0024   0.831      C0109   0.818      C0194   0.774
8       C0009      C0180   0.979      C0083   0.978      C0097   0.972
9       C0010      C0111   0.928      C0029   0.920      C0142   0.892
10      C0011      C0153   0.823      C0099   0.759      C0013   0.750
11      C0012      C0104   0.890      C0059   0.885      C0163   0.883
12      C0013      C004