In [1]:
!pip install ucimlrepo &> null
print("ucimlrepo installed successfully")

!pip install pycaret &> /dev/null
print ("Pycaret installed sucessfully!!")

ucimlrepo installed successfully
Pycaret installed sucessfully!!


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [3]:
from ucimlrepo import fetch_ucirepo

# getting dataset
heart_disease = fetch_ucirepo(id=45)

X = heart_disease.data.features
y = heart_disease.data.targets

print(heart_disease.variables)

        name     role         type demographic  \
0        age  Feature      Integer         Age   
1        sex  Feature  Categorical         Sex   
2         cp  Feature  Categorical        None   
3   trestbps  Feature      Integer        None   
4       chol  Feature      Integer        None   
5        fbs  Feature  Categorical        None   
6    restecg  Feature  Categorical        None   
7    thalach  Feature      Integer        None   
8      exang  Feature  Categorical        None   
9    oldpeak  Feature      Integer        None   
10     slope  Feature  Categorical        None   
11        ca  Feature      Integer        None   
12      thal  Feature  Categorical        None   
13       num   Target      Integer        None   

                                          description  units missing_values  
0                                                None  years             no  
1                                                None   None             no  
2              

In [4]:
from pycaret.clustering import *
model = setup(X, verbose = False)

In [5]:
models_list = model.models().Name.index
print(models_list)

Index(['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics',
       'birch'],
      dtype='object', name='ID')


In [6]:
models_list = models_list[[0,2,4]]
print('Clustering Models Taken: ', models_list)

Clustering Models Taken:  Index(['kmeans', 'meanshift', 'hclust'], dtype='object', name='ID')


In [7]:
parameters ={
    'No Data Processing': {'transformation': False, 'normalize': False, 'pca': False},
    'Using Normalisation': {'transformation': False, 'normalize': True, 'pca': False},
    'Using Transform': {'transformation': True, 'normalize': False, 'pca': False},
    'Using PCA': {'transformation': False, 'normalize': False, 'pca': True},
    'T+N': {'transformation': True, 'normalize': True, 'pca': False},
    'T+N+PCA': {'transformation': True, 'normalize': True, 'pca': True},
}

In [8]:
results = []

for model in models_list:
    model_results = pd.DataFrame()

    for size in range(3, 6):
        for name, args in parameters.items():
            exp = setup(X, verbose=False, **args)
            create_model(model, num_clusters=size, verbose=False)
            temp = exp.pull()

            temp['name'] = name
            temp['cluster_size'] = size

            model_results = pd.concat([model_results, temp], ignore_index=True)

    model_results.set_index(['name', 'cluster_size'], inplace=True)

    model_results_transposed = model_results.sort_index().T

    model_results_transposed.iloc[:3, :].to_csv(model + '.csv')

    print(model)
    display(model_results_transposed.iloc[:3, :])


kmeans


name,No Data Processing,No Data Processing,No Data Processing,T+N,T+N,T+N,T+N+PCA,T+N+PCA,T+N+PCA,Using Normalisation,Using Normalisation,Using Normalisation,Using PCA,Using PCA,Using PCA,Using Transform,Using Transform,Using Transform
cluster_size,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5
Silhouette,0.2812,0.2799,0.2754,0.1349,0.1358,0.1205,0.1353,0.1358,0.122,0.1287,0.1326,0.1273,0.2821,0.2799,0.2778,0.5602,0.5401,0.5399
Calinski-Harabasz,195.4521,177.0044,169.1367,48.7465,41.7209,36.8135,48.8753,41.5692,37.278,47.2545,40.8434,35.5697,195.5082,177.0044,169.3746,801.8385,923.3111,1016.9573
Davies-Bouldin,1.1562,1.0647,1.0552,2.3125,2.1014,2.1411,2.2439,2.1543,2.203,2.2305,2.1309,2.1853,1.1533,1.0647,1.0674,0.5415,0.5253,0.5313


meanshift


name,No Data Processing,No Data Processing,No Data Processing,T+N,T+N,T+N,T+N+PCA,T+N+PCA,T+N+PCA,Using Normalisation,Using Normalisation,Using Normalisation,Using PCA,Using PCA,Using PCA,Using Transform,Using Transform,Using Transform
cluster_size,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5
Silhouette,0.6378,0.6378,0.6378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6378,0.6378,0.6378,0.0,0.0,0.0
Calinski-Harabasz,54.463,54.463,54.463,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54.463,54.463,54.463,0.0,0.0,0.0
Davies-Bouldin,0.5276,0.5276,0.5276,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5276,0.5276,0.5276,0.0,0.0,0.0


hclust


name,No Data Processing,No Data Processing,No Data Processing,T+N,T+N,T+N,T+N+PCA,T+N+PCA,T+N+PCA,Using Normalisation,Using Normalisation,Using Normalisation,Using PCA,Using PCA,Using PCA,Using Transform,Using Transform,Using Transform
cluster_size,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5,3,4,5
Silhouette,0.2443,0.2537,0.1995,0.1426,0.1123,0.0921,0.1426,0.1123,0.0921,0.1624,0.1193,0.1062,0.2443,0.2537,0.1995,0.5585,0.5403,0.5543
Calinski-Harabasz,174.0719,154.4552,145.7324,39.0832,35.3045,31.181,39.0832,35.3045,31.181,40.7073,37.7609,32.7531,174.0719,154.4552,145.7324,781.6421,877.0004,951.6959
Davies-Bouldin,1.2725,1.1425,1.2998,2.1858,2.231,2.4554,2.1858,2.231,2.4554,2.0277,2.1313,2.285,1.2725,1.1425,1.2998,0.5343,0.5204,0.5235
