In [2]:
# Clustering Analysis using PyCaret in Google Colab

# Install PyCaret if not already installed

# Import necessary libraries
import pandas as pd
from pycaret.clustering import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load dataset from UCI repository (Example: Iris dataset)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv(url, names=columns)
df = df.drop(columns=['class'])  # Removing the target variable as clustering is unsupervised

# Preprocessing techniques
def preprocess_data(df, method):
    if method == 'standard':
        scaler = StandardScaler()
    elif method == 'minmax':
        scaler = MinMaxScaler()
    else:
        return df  # No scaling
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Define clustering configurations
preprocessing_methods = ['none', 'standard', 'minmax']
cluster_algorithms = ['kmeans', 'ap', 'meanshift', 'hclust']  # Affinity Propagation, Mean Shift, Hierarchical Clustering
cluster_numbers = [3, 4, 5]  # Different cluster numbers to compare

# Perform clustering with different settings
results = []
for pre_method in preprocessing_methods:
    df_preprocessed = preprocess_data(df, pre_method)

    for algo in cluster_algorithms:
        for clusters in cluster_numbers:
            print(f"Running {algo} with {clusters} clusters and {pre_method} scaling")
            setup_data = setup(df_preprocessed)
            model = create_model(algo, num_clusters=clusters) if algo == 'kmeans' else create_model(algo)
            eval_metrics = pull()  # Retrieve evaluation metrics
            eval_metrics['algorithm'] = algo
            eval_metrics['clusters'] = clusters
            eval_metrics['preprocessing'] = pre_method
            results.append(eval_metrics)

# Convert results to DataFrame and display summary
results_df = pd.concat(results, ignore_index=True)
print(results_df)

# Save results for further analysis
results_df.to_csv('clustering_results.csv', index=False)


Running kmeans with 3 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,4040
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.551,560.366,0.6664,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 4 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,8450
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4972,529.1207,0.776,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 5 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,6099
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4341,435.4545,0.8243,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 3 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,1149
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3516,445.0811,0.9806,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 4 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,6528
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3516,445.0811,0.9806,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 5 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,3073
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3516,445.0811,0.9806,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 3 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,7921
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6855,508.8825,0.3893,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 4 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,6537
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6855,508.8825,0.3893,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 5 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,710
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6855,508.8825,0.3893,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 3 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,2285
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4887,513.7721,0.7956,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 4 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,7026
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4887,513.7721,0.7956,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 5 clusters and none scaling


Unnamed: 0,Description,Value
0,Session id,3972
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4887,513.7721,0.7956,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 3 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,4258
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4554,236.8216,0.8296,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 4 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,1907
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.404,205.3895,0.8318,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 5 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,7256
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4098,167.3985,0.9872,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 3 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,7364
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3434,176.9949,0.9041,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 4 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,5183
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3434,176.9949,0.9041,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 5 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,623
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3434,176.9949,0.9041,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 3 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,3826
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.5802,248.9035,0.5976,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 4 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,7511
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.5802,248.9035,0.5976,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 5 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,5737
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.5802,248.9035,0.5976,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 3 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,7077
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3993,198.7303,0.9811,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 4 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,4612
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3993,198.7303,0.9811,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 5 clusters and standard scaling


Unnamed: 0,Description,Value
0,Session id,2584
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3993,198.7303,0.9811,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 3 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,2923
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.5043,358.5672,0.761,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 4 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,5794
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4446,313.1838,0.9011,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running kmeans with 5 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,1534
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4152,245.9652,0.9082,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 3 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,3415
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3197,234.7115,1.0427,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 4 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,1221
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3197,234.7115,1.0427,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running ap with 5 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,7478
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.3197,234.7115,1.0427,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 3 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,7459
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6295,353.3674,0.4877,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 4 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,3809
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6295,353.3674,0.4877,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running meanshift with 5 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,3119
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.6295,353.3674,0.4877,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 3 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,2765
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4326,299.8992,0.8493,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 4 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,3230
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4326,299.8992,0.8493,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Running hclust with 5 clusters and minmax scaling


Unnamed: 0,Description,Value
0,Session id,5679
1,Original data shape,"(150, 4)"
2,Transformed data shape,"(150, 4)"
3,Numeric features,4
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.4326,299.8992,0.8493,0,0,0


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

    Silhouette  Calinski-Harabasz  Davies-Bouldin  Homogeneity  Rand Index  \
0       0.5510           560.3660          0.6664            0           0   
1       0.4972           529.1207          0.7760            0           0   
2       0.4341           435.4545          0.8243            0           0   
3       0.3516           445.0811          0.9806            0           0   
4       0.3516           445.0811          0.9806            0           0   
5       0.3516           445.0811          0.9806            0           0   
6       0.6855           508.8825          0.3893            0           0   
7       0.6855           508.8825          0.3893            0           0   
8       0.6855           508.8825          0.3893            0           0   
9       0.4887           513.7721          0.7956            0           0   
10      0.4887           513.7721          0.7956            0           0   
11      0.4887           513.7721          0.7956            0  

In [1]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting pandas<2.2.0 (from pycaret)
  Downloading pandas-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.3.tar.gz (169 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.6/169.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.8.0-py3-none-any.whl.metadata (7.9 kB)
Collectin