In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
import joblib

# --- Load and preprocess data ---
df = pd.read_csv("cleaned_dataset.csv")
X = df.drop("quality", axis=1)
y = df["quality"]

scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# --- Fit DBSCAN (best choice) ---
dbscan = DBSCAN(eps=2, min_samples=5)
labels = dbscan.fit_predict(X_scaled)

# --- Silhouette Score ---
if len(set(labels)) > 1:
    sil = silhouette_score(X_scaled, labels)
    print("DBSCAN Silhouette Score:", sil)

# --- Build cluster → quality mapping ---
cluster_quality_map = {}
for cluster in np.unique(labels):
    if cluster == -1:  # Noise
        continue
    cluster_indices = np.where(labels == cluster)[0]
    cluster_qualities = y.iloc[cluster_indices]
    avg_quality = cluster_qualities.mean()
    if avg_quality >= 5:
        cluster_quality_map[cluster] = "🍷 Good Quality"
    elif avg_quality >= 3.5:
        cluster_quality_map[cluster] = "👌 Medium Quality"
    else:
        cluster_quality_map[cluster] = "⚠️ Low Quality"

print("Cluster → Quality Mapping:", cluster_quality_map)

# --- Save everything ---
joblib.dump(dbscan, "best_cluster_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(labels, "cluster_labels.pkl")
joblib.dump(cluster_quality_map, "cluster_quality_map.pkl")
np.save("X_scaled.npy", X_scaled)

print("✅ Models & mapping saved successfully!")


DBSCAN Silhouette Score: 0.3162745246991103
Cluster → Quality Mapping: {0: '🍷 Good Quality', 1: '🍷 Good Quality', 2: '🍷 Good Quality', 3: '🍷 Good Quality'}
✅ Models & mapping saved successfully!
