In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

df_with_price = pd.read_csv("../Data/products_with_price.csv")
df_without_price = pd.read_csv("../Data/products_without_price.csv")

print("With price:", df_with_price.shape)
print("Without price:", df_without_price.shape)

In [None]:
def plot_elbow(data, title):
    inertias = []
    K = range(2, 11)
    for k in K:
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(data)
        inertias.append(kmeans.inertia_)
    
    plt.plot(K, inertias, marker='o')
    plt.title(title)
    plt.xlabel("Number of clusters (k)")
    plt.ylabel("Inertia")
    plt.show()

plot_elbow(df_with_price, "Elbow Method (With Price)")
plot_elbow(df_without_price, "Elbow Method (Without Price)")

In [None]:
k = 4  # You can adjust based on Elbow plot

kmeans_with = KMeans(n_clusters=k, random_state=42)
clusters_with = kmeans_with.fit_predict(df_with_price)

kmeans_without = KMeans(n_clusters=k, random_state=42)
clusters_without = kmeans_without.fit_predict(df_without_price)

In [None]:
def plot_clusters(data, labels, title):
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(data)
    
    plt.scatter(reduced[:, 0], reduced[:, 1], c=labels, cmap="tab10")
    plt.title(title)
    plt.xlabel("PCA 1")
    plt.ylabel("PCA 2")
    plt.show()

plot_clusters(df_with_price, clusters_with, "Clusters with Price")
plot_clusters(df_without_price, clusters_without, "Clusters without Price")

In [None]:
score_with = silhouette_score(df_with_price, clusters_with)
score_without = silhouette_score(df_without_price, clusters_without)

print(f"Silhouette Score (with price): {score_with:.3f}")
print(f"Silhouette Score (without price): {score_without:.3f}")