KNN

In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load data
customer_interactions = pd.read_csv("dataset/customer_interactions.csv")
product_details = pd.read_csv("dataset/product_details.csv", sep=";")
purchase_history = pd.read_csv("dataset/purchase_history_extend.csv", sep=";")

# Merge data
merged_data = pd.merge(customer_interactions, purchase_history, on="customer_id")
merged_data = pd.merge(merged_data, product_details, on="product_id")

# Select features and target
X = merged_data[['page_views', 'time_spent']]
y = merged_data['category']

# Normalize data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict and evaluate
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(y_pred)

# Function to recommend products for a given customer_id
def recommend_products(customer_id):
    customer_data = customer_interactions[customer_interactions['customer_id'] == customer_id]
    customer_data_scaled = scaler.transform(customer_data[['page_views', 'time_spent']])
    prediction = knn.predict(customer_data_scaled)
    print(prediction)
    recommended_products = merged_data[merged_data['category'] == prediction[0]]['product_id'].values
    print(f"Recommended products for customer_id {customer_id}: {recommended_products[0]}")
    return recommended_products

# Example recommendation for customer_id 1
recommended_products = recommend_products(5)


Accuracy: 1.0
['Beauty' 'Clothing' 'Home & Kitchen' 'Clothing' 'Electronics'
 'Electronics']
['Electronics']
Recommended products for customer_id 5: 101
