In [1]:
import pandas as pd
import numpy as np
from datetime import datetime



In [2]:
def create_customer_profiles(customers, products, transactions):
   profiles = {}
   
   for _, customer in customers.iterrows():
       profiles[customer['CustomerID']] = {
           'name': customer['CustomerName'],
           'region': customer['Region'],
           'signupDate': datetime.strptime(customer['SignupDate'], '%Y-%m-%d'),
           'totalSpent': 0,
           'categories': {},
           'transactionCount': 0,
           'avgOrderValue': 0
       }
   
   for _, tx in transactions.iterrows():
       profile = profiles[tx['CustomerID']]
       product = products[products['ProductID'] == tx['ProductID']].iloc[0]
       
       profile['totalSpent'] += float(tx['TotalValue'])
       profile['transactionCount'] += 1
       profile['categories'][product['Category']] = profile['categories'].get(product['Category'], 0) + 1
   
   for profile in profiles.values():
       if profile['transactionCount'] > 0:
           profile['avgOrderValue'] = profile['totalSpent'] / profile['transactionCount']
   
   return profiles



In [3]:
def calculate_similarity(profile1, profile2):
   region_sim = 1 if profile1['region'] == profile2['region'] else 0
   
   spending_ratio = min(profile1['totalSpent'], profile2['totalSpent']) / max(profile1['totalSpent'], profile2['totalSpent'])
   
   aov_ratio = min(profile1['avgOrderValue'], profile2['avgOrderValue']) / max(profile1['avgOrderValue'], profile2['avgOrderValue']) if max(profile1['avgOrderValue'], profile2['avgOrderValue']) > 0 else 0
   
   categories = set(list(profile1['categories'].keys()) + list(profile2['categories'].keys()))
   category_sim = 0
   
   for category in categories:
       cat1 = profile1['categories'].get(category, 0)
       cat2 = profile2['categories'].get(category, 0)
       max_cat = max(cat1, cat2)
       category_sim += min(cat1, cat2) / max_cat if max_cat > 0 else 0
   
   category_sim = category_sim / len(categories) if categories else 0
   
   return round(
       region_sim * 0.25 +
       spending_ratio * 0.25 +
       aov_ratio * 0.20 +
       category_sim * 0.30,
       3
   )



In [4]:
def create_profiles_and_lookalikes():
   customers = pd.read_csv('Customers.csv')
   products = pd.read_csv('Products.csv')
   transactions = pd.read_csv('Transactions.csv')
   
   profiles = create_customer_profiles(customers, products, transactions)
   
   results = {}
   target_customers = sorted([cid for cid in profiles.keys() if cid <= 'C0020'])
   
   for target_id in target_customers:
       similarities = []
       target_profile = profiles[target_id]
       
       for cust_id, profile in profiles.items():
           if cust_id != target_id:
               score = calculate_similarity(target_profile, profile)
               similarities.append({'cust_id': cust_id, 'score': score})
       
       top_3 = sorted(similarities, key=lambda x: x['score'], reverse=True)[:3]
       results[target_id] = [[m['cust_id'], m['score']] for m in top_3]
   
   df = pd.DataFrame({'customer_map': [results]})
   df.to_csv('Lookalike.csv', index=False)
   
   return results



In [5]:
# Execute
lookalike_map = create_profiles_and_lookalikes()

