<a href="https://colab.research.google.com/github/gowripreetham/SJSU_Pycaret/blob/main/CLustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================
# üíª Step 1: Install PyCaret (if needed)
# ==========================================
!pip install git+https://github.com/pycaret/pycaret.git@master --upgrade


In [None]:
# ==========================================
# üì¶ Step 2: Import and Load Dataset
# ==========================================
from sklearn.datasets import load_wine
import pandas as pd

# Load built-in dataset (safe and always available)
data = load_wine(as_frame=True)
df = data.frame.drop(columns=['target'])  # remove target since clustering is unsupervised

print("‚úÖ Dataset loaded successfully:", df.shape)
df.head()


In [None]:
# ==========================================
# ‚öôÔ∏è Step 3: Setup Clustering Environment
# ==========================================
from pycaret.clustering import *

exp_clu = setup(
    data=df,
    session_id=42,
    normalize=True,
    use_gpu=True,  # üëà shows GPU usage
    html=False
)


In [None]:
# ==========================================
# ü§ñ Step 4: Create and Compare Models
# ==========================================
# Train a K-Means clustering model
kmeans = create_model('kmeans')

# Check available clustering algorithms
models()


In [None]:
# ==========================================
# üìä Step 5: Assign Cluster Labels
# ==========================================
clustered_df = assign_model(kmeans)
print("‚úÖ Cluster labels assigned:", clustered_df['Cluster'].unique())
clustered_df.head()


In [None]:
# ==========================================
# üìà Step 6: Analyze Model
# ==========================================
plot_model(kmeans, plot='elbow')
plot_model(kmeans, plot='silhouette')
plot_model(kmeans, plot='cluster')

# Interactive evaluation dashboard (optional)
evaluate_model(kmeans)


In [None]:
# ==========================================
# üß© Step 7: Try Another Model (e.g., MeanShift)
# ==========================================
meanshift = create_model('meanshift')
assign_model(meanshift)
plot_model(meanshift, plot='cluster')


In [None]:
# ==========================================
# üîç Step 8: Inspect Configuration
# ==========================================
# Access PyCaret internal configs
get_config('X_train_transformed').head()

print("Current seed:", get_config('seed'))
set_config('seed', 123)
print("New seed:", get_config('seed'))


In [None]:
# ==========================================
# üíæ Step 9: Save and Load Model
# ==========================================
save_model(kmeans, 'clustering_wine_kmeans')
loaded_model = load_model('clustering_wine_kmeans')

print("‚úÖ Model saved and reloaded successfully.")


In [None]:
# ==========================================
# üìä Step 10: Predict on Unseen Data
# ==========================================
# Create new sample data (subset of df)
new_data = df.sample(10, random_state=42)
preds = predict_model(loaded_model, data=new_data)
preds
