In [1]:
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo 
from pk import PK
from sklearn.metrics import normalized_mutual_info_score


In [None]:
# Load Datasets -- kidney 

chronic_kidney_disease = fetch_ucirepo(id=336) 
  
# data (as pandas dataframes) 
X = chronic_kidney_disease.data.features 
y = chronic_kidney_disease.data.targets
  
X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()

# kernel mapping
pk = PK(n_bins_per_dim=8)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=7)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Mammo is: {nmi_score}.')

NMI score of Mammo is: 0.6818986701987366.


In [None]:
# Load Datasets -- mammo 

mammographic_mass = fetch_ucirepo(id=161) 
  
# data (as pandas dataframes) 
X = mammographic_mass.data.features 
y = mammographic_mass.data.targets 
  
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=6)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Mammo is: {nmi_score}.')

NMI score of Mammo is: 0.331456704849369.


In [None]:
# Load Datasets -- heart_disease 

heart_disease = fetch_ucirepo(id=45) 
  
X = heart_disease.data.features 
y = heart_disease.data.targets 

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=8)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Heart is: {nmi_score}.')

NMI score of Heart is: 0.2323117415517299.


In [None]:
# Load Datasets -- hepatitis 

hepatitis = fetch_ucirepo(id=46) 
  
X = hepatitis.data.features 
y = hepatitis.data.targets 

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=2)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=0)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Hepatitis is: {nmi_score}.')

NMI score of Hepatitis is: 0.22890914464324705.


In [None]:
# Load Datasets -- Wiscom 

breast_cancer_wisconsin_original = fetch_ucirepo(id=15) 
  
X = breast_cancer_wisconsin_original.data.features 
y = breast_cancer_wisconsin_original.data.targets 

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=8)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Wiscom is: {nmi_score}.')

NMI score of Wiscom is: 0.7879327095462877.


In [None]:
# Load Datasets -- Mushroom 

mushroom = fetch_ucirepo(id=73) 
  
# data (as pandas dataframes) 
X = mushroom.data.features 
y = mushroom.data.targets  

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=6)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Mushroom is: {nmi_score}.')

NMI score of Mushroom is: 0.5729371774663359.


In [None]:
# Load Datasets -- Credit 

credit_approval = fetch_ucirepo(id=27) 
  
X = credit_approval.data.features 
y = credit_approval.data.targets 

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=3)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=4)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of Credit is: {nmi_score}.')

NMI score of Credit is: 0.31432825258228947.


In [None]:
# Load Datasets -- adult 

adult = fetch_ucirepo(id=2) 
  
X = adult.data.features 
y = adult.data.targets 

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=5)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of adult is: {nmi_score}.')

NMI score of adult is: 0.08073083857711648.


In [7]:
# lung

lung_cancer = fetch_ucirepo(id=62) 
  
# data (as pandas dataframes) 
X = lung_cancer.data.features 
y = lung_cancer.data.targets

X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=2)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of lung is: {nmi_score}.')

NMI score of lung is: 0.3975699075473587.


In [None]:
post_operative_patient = fetch_ucirepo(id=82) 
  
# data (as pandas dataframes) 
X = post_operative_patient.data.features 
y = post_operative_patient.data.targets 


X = X.apply(lambda col: pd.Categorical(col).codes.astype(float) if col.dtype == 'object' else col)
X[X == -1] = np.nan  
X = X.astype(float)
X = X.values
y = y.values
y = y.ravel()
# variable information 


# kernel mapping
pk = PK(n_bins_per_dim=6)     
New_feature = pk.fit_transform(X)

# clustering performance
kmeans = KMeans(n_clusters=len(np.unique(y)), random_state=42)
y_pred = kmeans.fit_predict(New_feature)
nmi_score = normalized_mutual_info_score(y.squeeze(), y_pred)
print(f'NMI score of post is: {nmi_score}.')

NMI score of post is: 0.05736941333511819.
