In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Prepare customer features
def lookalike_model(customers, transactions, products):
    customer_features = transactions.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum'}).reset_index()
    customer_data = customers.merge(customer_features, on='CustomerID', how='left').fillna(0)
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(customer_data[['TotalValue', 'Quantity']])

    nn = NearestNeighbors(n_neighbors=4, metric='euclidean')
    nn.fit(scaled_data)
    distances, indices = nn.kneighbors(scaled_data)

    lookalikes = {}
    for idx, cust_id in enumerate(customer_data['CustomerID'][:20]):
        lookalikes[cust_id] = [(customer_data.iloc[i]['CustomerID'], round(distances[idx][j], 2)) for j, i in enumerate(indices[idx]) if i != idx]

    return lookalikes

lookalike_results = lookalike_model(customers, transactions, products)
pd.DataFrame.from_dict(lookalike_results, orient='index').to_csv('Lookalike.csv', header=False)