In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import numpy as np
from warnings import filterwarnings
filterwarnings('ignore')

## Load the Data

In [2]:
# Load data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
customers.head(3)

Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07


In [4]:
products.head(3)

Unnamed: 0,ProductID,ProductName,Category,Price
0,P001,ActiveWear Biography,Books,169.3
1,P002,ActiveWear Smartwatch,Electronics,346.3
2,P003,ComfortLiving Biography,Books,44.12


In [5]:
transactions.head(5)

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68


## Merge tables

In [6]:
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')
data.drop('Price_x',axis=1,inplace=True)
data.rename({'Price_y':'Price'},axis=1,inplace=True)

In [7]:
df = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price': 'mean',
    'ProductID': 'count',  # Purchase frequency
    'Category': lambda x: x.mode()[0],  # Most common category
    'Region': 'first',
}).reset_index()

## Encode categorical features

In [8]:
df = pd.get_dummies(df, columns=['Region', 'Category'])

## Scale numerical features

In [9]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('CustomerID', axis=1))

# Train k-NN model

In [10]:
knn = NearestNeighbors(n_neighbors=3, metric='cosine')
knn.fit(scaled_features)

## Function to find similar customers

In [11]:
def find_similar_customers(input_features, n=3):
    input_scaled = scaler.transform([input_features])
    distances, indices = knn.kneighbors(input_scaled, n_neighbors=n)
    similar_customers = df.iloc[indices[0]].copy()
    similar_customers['SimilarityScore'] = 1 - distances[0]
    return similar_customers[['CustomerID', 'SimilarityScore']]

In [12]:
# Example usage
input_customer = scaled_features[0]
recommendations = find_similar_customers(input_customer)
print(recommendations)

    CustomerID  SimilarityScore
111      C0112         0.962488
94       C0095         0.934705
188      C0190         0.844713
