In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt

# Machine learning and data preprocessing
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Import Dataset

In [2]:
users = pd.read_csv('//Users/andi/Project/NEXT x Telkom/Dataset/users.csv')
products = pd.read_csv('//Users/andi/Project/NEXT x Telkom/Dataset/products.csv')
transaction = pd.read_csv('//Users/andi/Project/NEXT x Telkom/Dataset/order_transactions.csv')
details = pd.read_csv('//Users/andi/Project/NEXT x Telkom/Dataset/order_details.csv')

In [3]:
# Merge order details and transaction data on order_id
order_data = pd.merge(details, transaction, on='order_id')

# Merge with product data to get product names and prices
order_data = pd.merge(order_data, products[['product_id', 'product_name']], on='product_id')

# Group by user and product to calculate total quantity purchased or interacted with
user_product_interactions = order_data.groupby(['buyer_id', 'product_id'])['quantity'].sum().unstack(fill_value=0)

# Convert the user-product interaction matrix to a sparse matrix
user_product_sparse = csr_matrix(user_product_interactions.values)

  user_product_interactions = order_data.groupby(['buyer_id', 'product_id'])['quantity'].sum().unstack(fill_value=0)


# KNN - User Base

In [4]:
# Fit the KNN model for users
user_knn = NearestNeighbors(metric='cosine', algorithm='brute')
user_knn.fit(user_product_sparse)

# Finding the nearest neighbors for a specific user
user_id = '631e52cf049b5bbe31460940'
user_index = user_product_interactions.index.get_loc(user_id)

# Get the distances and indices of nearest neighbors
distances_user, indices_user = user_knn.kneighbors(user_product_sparse[user_index, :], n_neighbors=6)

# Get similar users and their similarity rates
similar_users = user_product_interactions.index[indices_user.flatten()].tolist()[1:]
similarity_rates_user = 1 - distances_user.flatten()[1:]

# Retrieve products interacted with by the similar users
similar_users_interactions = user_product_interactions.loc[similar_users]

# Sum interactions across the similar users
product_recommendations = similar_users_interactions.sum(axis=0).sort_values(ascending=False).head(5)

recommended_product_names = products.set_index('product_id').loc[product_recommendations.index]['product_name'].tolist()

# ------------------------ Output Results ------------------------

print("Similar Users (User-Based KNN):", similar_users)
print("Similarity Rates (User-Based KNN):", similarity_rates_user)
print("\nRecommended Products for the User (Product Names):", recommended_product_names)

Similar Users (User-Based KNN): ['631e674f516d7c2adcec194d', '631e4e82e5e161518eff5369', '631e543d049b5bbe31461892', '631e6717516d7c2adcec1688', '631e67f6516d7c2adcec1f9d']
Similarity Rates (User-Based KNN): [0.39508388 0.35943285 0.30510452 0.28733138 0.27209676]

Recommended Products for the User (Product Names): ['Mie Kremezz Rasa Keju Manis', 'Mie kremezz Ayam Panggang', 'Beng Beng Coklat 25gram', 'Richeese Nabati Ahh Keju 10gram', 'Cokelat Superstar']


In [12]:
# Finding the nearest neighbors for a specific user
user_id = '631e67e7516d7c2adcec1eca'
user_index = user_product_interactions.index.get_loc(user_id)

# Get the distances and indices of nearest neighbors
distances_user, indices_user = user_knn.kneighbors(user_product_sparse[user_index, :], n_neighbors=6)

# Get similar users and their similarity rates
similar_users = user_product_interactions.index[indices_user.flatten()].tolist()[1:]
similarity_rates_user = 1 - distances_user.flatten()[1:]

# Retrieve products interacted with by the similar users
similar_users_interactions = user_product_interactions.loc[similar_users]

# Sum interactions across the similar users
product_recommendations = similar_users_interactions.sum(axis=0).sort_values(ascending=False).head(5)

recommended_product_names = products.set_index('product_id').loc[product_recommendations.index]['product_name'].tolist()

# ------------------------ Output Results ------------------------

print("Similar Users (User-Based KNN):", similar_users)
print("Similarity Rates (User-Based KNN):", similarity_rates_user)
print("\nRecommended Products for the User (Product Names):", recommended_product_names)

Similar Users (User-Based KNN): ['631e67d7516d7c2adcec1e09', '631e2dccdf5e0b1702aff022', '631e508a049b5bbe3145f12b', '631e50bd049b5bbe3145f351', '631e5090049b5bbe3145f176']
Similarity Rates (User-Based KNN): [0.82659747 0.82403496 0.80594779 0.78414029 0.71111591]

Recommended Products for the User (Product Names): ['ULTRA TEH KOTAK LESSUGAR TP 200ML +100ML', 'ULTRA MILK UHT FULL CREAM', 'ULTRA MILK  STROBERY 250 ML', 'ULTRA MILK COKLAT 250 ML', 'ULTRA TEH KOTAK 200ML 200 ML - TEH KOTAK']


# KNN- Item Based

In [5]:
# Fit the KNN model for items
item_knn = NearestNeighbors(metric='cosine', algorithm='brute')
item_knn.fit(user_product_sparse.T)

# Finding the nearest neighbors for a specific product
product_id = '631d5039e68bbb0b8d654f28' 
product_index = user_product_interactions.columns.get_loc(product_id)

# Get the distances and indices of nearest neighbors for items
distances_item, indices_item = item_knn.kneighbors(user_product_sparse[:, product_index].T, n_neighbors=6)

# Get similar products (product IDs) and their similarity rates
similar_products_ids = user_product_interactions.columns[indices_item.flatten()].tolist()[1:6]  
similarity_rates_item = 1 - distances_item.flatten()[1:6]  

similar_products_names = products.set_index('product_id').loc[similar_products_ids]['product_name'].tolist()

# ------------------------ Output Results ------------------------

print("Similar Products (Item-Based KNN - Product IDs):", similar_products_ids)
print("Similar Products (Item-Based KNN - Product Names):", similar_products_names)
print("Similarity Rates (Item-Based KNN):", similarity_rates_item)

Similar Products (Item-Based KNN - Product IDs): ['631d5039e68bbb0b8d654f24', '631d5038e68bbb0b8d654f08', '631d5038e68bbb0b8d654f12', '631d5038e68bbb0b8d654f02', '631d5039e68bbb0b8d654f22']
Similar Products (Item-Based KNN - Product Names): ['AC Panasonic 1,5 PK', 'Kursi Tunggu Nasabah 4 Seat ( tanpa sandaran )', 'KURSI STAFF ', 'Lemari Arsip Kaca Modera', 'AC Panasonic 2Pk']
Similarity Rates (Item-Based KNN): [1. 1. 1. 1. 1.]


In [13]:
# Finding the nearest neighbors for a specific product
product_id = '631b2d6586073948b5b4beb6' 
product_index = user_product_interactions.columns.get_loc(product_id)

# Get the distances and indices of nearest neighbors for items
distances_item, indices_item = item_knn.kneighbors(user_product_sparse[:, product_index].T, n_neighbors=6)

# Get similar products (product IDs) and their similarity rates
similar_products_ids = user_product_interactions.columns[indices_item.flatten()].tolist()[1:6]  
similarity_rates_item = 1 - distances_item.flatten()[1:6]  

similar_products_names = products.set_index('product_id').loc[similar_products_ids]['product_name'].tolist()

# ------------------------ Output Results ------------------------

print("Similar Products (Item-Based KNN - Product IDs):", similar_products_ids)
print("Similar Products (Item-Based KNN - Product Names):", similar_products_names)
print("Similarity Rates (Item-Based KNN):", similarity_rates_item)

Similar Products (Item-Based KNN - Product IDs): ['64a5232d5ade0b8684a74e44', '631b2d6586073948b5b4beb6', '65f14da6c6360da5e97a73e1', '63a2d5feba6af2659da3c2ed', '64d218f84c821e602beb5600']
Similar Products (Item-Based KNN - Product Names): ['Pekerjaan Jasa Perbaikan Terminasi RCL (Reaktor Current Limiter) Pusri-1B\t', 'Jasa pekerjaan perbaikan line power Reaktor Synbus Pusri-4', 'Pencucian HVAC Area STG periode Maret 2024 ', 'Pekerjaan Jasa pemasangan  LBS-311\t', 'BATTERY N200 PREMIUM']
Similarity Rates (Item-Based KNN): [1. 1. 1. 1. 1.]
