In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')


In [3]:
# Aggregate transaction data
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

In [4]:
# Merge customer profiles with transaction data
customer_profile = customers.merge(customer_transactions, on='CustomerID', how='left').fillna(0)

In [5]:
# Standardize numerical data for similarity calculations
scaler = StandardScaler()
customer_features = scaler.fit_transform(customer_profile[['TotalValue', 'Quantity']])

In [6]:
# Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(customer_features)
similarity_df = pd.DataFrame(similarity_matrix,
                             index=customer_profile['CustomerID'],
                             columns=customer_profile['CustomerID'])


In [7]:
# Generate the top 3 lookalike customers for each of the first 20 customers
lookalikes = {}
for customer_id in customer_profile['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
    lookalikes[customer_id] = list(zip(similar_customers.index, similar_customers.values))


In [8]:
# Convert the lookalike data into a DataFrame for easier inspection
lookalike_df = pd.DataFrame([{'CustomerID': k, 'Lookalikes': v} for k, v in lookalikes.items()])

In [9]:
# Save the lookalike recommendations to a CSV file
lookalike_df.to_csv('Lookalike.csv', index=False)

# Print the first few rows of the lookalike DataFrame to verify
print(lookalike_df.head())

  CustomerID                                         Lookalikes
0      C0001  [(C0164, 0.9999247622065323), (C0085, 0.999596...
1      C0002  [(C0157, 0.9999942123711493), (C0094, 0.999827...
2      C0003  [(C0111, 0.9956161658685471), (C0160, 0.989198...
3      C0004  [(C0162, 0.9999998954114413), (C0165, 0.999964...
4      C0005  [(C0080, 0.9999807052404533), (C0167, 0.999973...
