In [None]:
!pip install scikit-learn


In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Step 1: Load the Data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Step 2: Merge Data
# Merge transactions with customers to get customer profiles
merged_data = transactions.merge(customers, on='CustomerID')

# Create a summary of transactions per customer
customer_summary = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': 'count'
}).reset_index()

# Step 3: Feature Engineering
# Normalize the features
scaler = StandardScaler()
features = scaler.fit_transform(customer_summary[['TotalValue', 'Quantity', 'ProductID']])

# Step 4: Calculate Similarity
similarity_matrix = cosine_similarity(features)

# Step 5: Build Lookalike Model
lookalike_results = {}
for index, row in customer_summary.iterrows():
    customer_id = row['CustomerID']
    # Get similarity scores for the current customer
    scores = similarity_matrix[index]
    # Get indices of the top 3 similar customers (excluding the customer itself)
    similar_indices = scores.argsort()[-4:-1][::-1]  # Get top 3 excluding self
    similar_customers = customer_summary.iloc[similar_indices]
    # Store results
    lookalike_results[customer_id] = list(zip(similar_customers['CustomerID'], scores[similar_indices]))

# Step 6: Create DataFrame for Output
lookalike_list = []

for customer_id, similar_customers in lookalike_results.items():
    for similar_customer_id, score in similar_customers:
        lookalike_list.append({
            'CustomerID': customer_id,
            'SimilarCustomerID': similar_customer_id,
            'SimilarityScore': score
        })

lookalike_df = pd.DataFrame(lookalike_list)

# Step 7: Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)

# Display the first 20 customers' lookalikes
print(lookalike_df.head(20))

   CustomerID SimilarCustomerID  SimilarityScore
0       C0001             C0164         0.997598
1       C0001             C0103         0.995394
2       C0001             C0069         0.986073
3       C0002             C0029         0.999754
4       C0002             C0031         0.998986
5       C0002             C0077         0.994313
6       C0003             C0176         0.902950
7       C0003             C0027         0.875121
8       C0003             C0010         0.832965
9       C0004             C0075         0.997789
10      C0004             C0165         0.994442
11      C0004             C0113         0.993976
12      C0005             C0123         0.999781
13      C0005             C0131         0.999628
14      C0005             C0058         0.999561
15      C0006             C0079         0.999882
16      C0006             C0117         0.989525
17      C0006             C0196         0.945252
18      C0007             C0125         0.998032
19      C0007       