In [1]:
!pip install pycaret --quiet
!pip install pandas --quiet

In [2]:
import pandas as pd
from pycaret.clustering import setup, create_model, assign_model, evaluate_model
from sklearn.datasets import load_wine

In [3]:
# Load the Wine dataset from sklearn
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)

# Display the first few rows of the dataset
print("First few rows of the Wine dataset:")
print(X.head())

# Check dataset information
print("\nDataset Info:")
print(X.info())

# Check for missing values
print("\nMissing values in the dataset:")
print(X.isnull().sum())

First few rows of the Wine dataset:
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_

In [4]:
# Initialize the PyCaret clustering setup
clustering_setup = setup(data=X, normalize=True, session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(178, 13)"
2,Transformed data shape,"(178, 13)"
3,Numeric features,13
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,Normalize,True
9,Normalize method,zscore


In [5]:
# Create a DBSCAN clustering model
dbscan_model = create_model('dbscan')

# Evaluate the model visually
evaluate_model(dbscan_model)

Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0,0,0,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [6]:
# Assign clusters to the dataset
clustered_data = assign_model(dbscan_model)

# Display the dataset with cluster labels
print("Clustered Dataset:")
print(clustered_data.head())

Clustered Dataset:
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43          15.600000      127.0           2.80   
1    13.20        1.78  2.14          11.200000      100.0           2.65   
2    13.16        2.36  2.67          18.600000      101.0           2.80   
3    14.37        1.95  2.50          16.799999      113.0           3.85   
4    13.24        2.59  2.87          21.000000      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline   

In [7]:
# Analyze the cluster distribution
cluster_distribution = clustered_data['Cluster'].value_counts()

print("Cluster Distribution:")
print(cluster_distribution)

# Group data by clusters and compute mean values for each cluster
cluster_summary = clustered_data.groupby('Cluster').mean()

print("\nCluster Summary:")
print(cluster_summary)

Cluster Distribution:
Cluster
Cluster -1    178
Name: count, dtype: int64

Cluster Summary:
              alcohol  malic_acid       ash  alcalinity_of_ash  magnesium  \
Cluster                                                                     
Cluster -1  13.000619    2.336348  2.366517          19.494944   99.74157   

            total_phenols  flavanoids  nonflavanoid_phenols  proanthocyanins  \
Cluster                                                                        
Cluster -1       2.295112     2.02927              0.361854         1.590899   

            color_intensity       hue  od280/od315_of_diluted_wines    proline  
Cluster                                                                         
Cluster -1          5.05809  0.957449                      2.611686  746.89325  
