In [None]:
import numpy
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import normalized_mutual_info_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.optimize import linear_sum_assignment
from kmodes.kprototypes import KPrototypes

from duped_modules.dcn_duped import DCNDuped
from duped_modules.dec_duped import DECDuped, IDECDuped
from duped_modules.embeddings_autoencoder import EmbeddingsAutoencoder
from duped_modules.gower_duped import gower_matrix as gower_matrix_duped

In [None]:
banking_df = pd.read_csv("datasets/banking_marketing.csv", sep=";")
banking_df.drop_duplicates(inplace=True)

banking_y = banking_df["y"]
banking_y.hist()
banking_y = LabelEncoder().fit_transform(banking_y)

banking_cat_cols = ["age", "job", "marital", "education", "default", "housing", "loan", "contact", "poutcome"]
banking_cont_cols = ["balance", "duration", "campaign", "pdays", "previous"]

banking_df.drop(columns=["y", "day", "month"], axis=1, inplace=True)
banking_df[banking_cat_cols] = banking_df[banking_cat_cols].apply(LabelEncoder().fit_transform)
banking_df[banking_cont_cols] = StandardScaler().fit_transform(banking_df[banking_cont_cols])
banking_df

In [None]:
census_df = pd.read_csv("datasets/census_income.csv")
census_df.drop_duplicates(inplace=True)

census_df.loc[(census_df["class"] == " <=50K.") | (census_df["class"] == " <=50K"), "class"] = 0
census_df.loc[(census_df["class"] == " >50K.") | (census_df["class"] == " >50K"), "class"] = 1
census_df["class"].hist()
census_y = census_df["class"].to_numpy()

census_cat_cols = ["workclass", "education", "marital-status", "occupation", "relationship", "race", "sex", "native-country"]
census_cont_cols = ["age", "fnlwgt", "education-num", "capital-gain", "capital-loss", "hours-per-week"]

census_df.drop(columns="class", inplace=True)
census_df[census_cat_cols] = census_df[census_cat_cols].apply(LabelEncoder().fit_transform)
census_df[census_cont_cols] = StandardScaler().fit_transform(census_df[census_cont_cols])
census_df

In [None]:
credit_df = pd.read_csv("datasets/credit_approval.csv")
credit_df.replace("?", pd.NA, inplace=True)
credit_df.dropna(inplace=True)
credit_df.drop_duplicates(inplace=True)

credit_y = credit_df["A16"]
credit_y.hist()
credit_y = LabelEncoder().fit_transform(credit_y)

credit_cat_cols = ["A1", "A4", "A5", "A6", "A7", "A9", "A10", "A12", "A13"]
credit_cont_cols = ["A2", "A3", "A8", "A11", "A14", "A15"]

credit_df.drop(columns="A16", inplace=True)
credit_df[credit_cat_cols] = credit_df[credit_cat_cols].apply(LabelEncoder().fit_transform)
credit_df[credit_cont_cols] = StandardScaler().fit_transform(credit_df[credit_cont_cols])
credit_df

In [None]:
heart_df = pd.read_csv("datasets/heart_disease.csv")
heart_df.dropna(inplace=True)
heart_df.drop_duplicates(inplace=True)

heart_df["num"].hist()
heart_y = heart_df["num"].to_numpy()

heart_cat_cols = ["sex", "cp", "fbs", "restecg", "exang", "slope", "thal"]
heart_cont_cols = ["age", "trestbps", "chol", "thalch", "oldpeak", "ca"]

heart_df.drop(columns=["id", "dataset", "num"], inplace=True)
heart_df.dropna(inplace=True)

heart_df[heart_cat_cols] = heart_df[heart_cat_cols].apply(LabelEncoder().fit_transform)
heart_df[heart_cont_cols] = StandardScaler().fit_transform(heart_df[heart_cont_cols])
heart_df

In [None]:
class BankingDataset(Dataset):
    def __init__(self, df):
        self.cat = torch.tensor(df[banking_cat_cols].values, dtype=torch.float)
        self.cont = torch.tensor(df[banking_cont_cols].values, dtype=torch.float)
    def __getitem__(self, idx):
        return self.cat[idx], self.cont[idx]
    def __len__(self):
        return self.cat.shape[0]

banking_dataloader = DataLoader(BankingDataset(banking_df), batch_size=512, shuffle=True)

banking_emb_sizes = [(banking_df[col].nunique(), min(50, banking_df[col].nunique()+1) // 2) for col in banking_df[banking_cat_cols]]
banking_cat_dim = sum(d for _, d in banking_emb_sizes)
banking_input_dim = banking_cat_dim + len(banking_cont_cols)

print(f"Cat dim: {banking_cat_dim}, Input dim: {banking_input_dim}")
print(f"Embeddings: {banking_emb_sizes}")

In [None]:
class CensusDataset(Dataset):
    def __init__(self, df):
        self.cat = torch.tensor(df[census_cat_cols].values, dtype=torch.float)
        self.cont = torch.tensor(df[census_cont_cols].values, dtype=torch.float)
    def __getitem__(self, idx):
        return self.cat[idx], self.cont[idx]
    def __len__(self):
        return self.cat.shape[0]

census_dataloader = DataLoader(CensusDataset(census_df), batch_size=512, shuffle=True)

census_emb_sizes = [(census_df[col].nunique(), min(50, census_df[col].nunique() + 1) // 2) for col in census_df[census_cat_cols]]
census_cat_dim = sum(d for _, d in census_emb_sizes)
census_input_dim = census_cat_dim + len(census_cont_cols)

print(f"Cat dim: {census_cat_dim}, Input dim: {census_input_dim}")
print(f"Embeddings: {census_emb_sizes}")

In [None]:
class CreditDataset(Dataset):
    def __init__(self, df):
        self.cat = torch.tensor(df[credit_cat_cols].values, dtype=torch.float)
        self.cont = torch.tensor(df[credit_cont_cols].values, dtype=torch.float)
    def __getitem__(self, idx):
        return self.cat[idx], self.cont[idx]
    def __len__(self):
        return self.cat.shape[0]

credit_dataloader = DataLoader(CreditDataset(credit_df), batch_size=32, shuffle=True)

credit_emb_sizes = [(credit_df[col].nunique(), min(50, credit_df[col].nunique() + 1) // 2) for col in credit_df[credit_cat_cols]]
credit_cat_dim = sum(d for _, d in credit_emb_sizes)
credit_input_dim = credit_cat_dim + len(credit_cont_cols)

print(f"Cat dim: {credit_cat_dim}, Input dim: {credit_input_dim}")
print(f"Embeddings: {credit_emb_sizes}")

In [None]:
class HeartDataset(Dataset):
    def __init__(self, df):
        self.cat = torch.tensor(df[heart_cat_cols].values, dtype=torch.float)
        self.cont = torch.tensor(df[heart_cont_cols].values, dtype=torch.float)
    def __getitem__(self, idx):
        return self.cat[idx], self.cont[idx]
    def __len__(self):
        return self.cat.shape[0]

heart_dataloader = DataLoader(HeartDataset(heart_df), batch_size=64, shuffle=True)

heart_emb_sizes = [(heart_df[col].nunique(), min(50, heart_df[col].nunique() + 1) // 2) for col in heart_df[heart_cat_cols]]
heart_cat_dim = sum(d for n, d in heart_emb_sizes)
heart_input_dim = heart_cat_dim + len(heart_cont_cols)

print(f"Cat dim: {heart_cat_dim}, Input dim: {heart_input_dim}")
print(f"Embeddings: {heart_emb_sizes}")

In [None]:
# banking_k_prototypes = KPrototypes(n_clusters=2, random_state=0, verbose=1, n_jobs=-1)
# banking_k_prototypes.fit(banking_df.values, categorical=[banking_df.columns.get_loc(col) for col in banking_cat_cols])
# banking_k_prototypes_nmi = normalized_mutual_info_score(banking_y, banking_k_prototypes.labels_)
# banking_k_prototypes_nmi

In [None]:
# census_k_prototypes = KPrototypes(n_clusters=2, random_state=0, verbose=1, n_jobs=-1)
# census_k_prototypes.fit(census_df.values, categorical=[census_df.columns.get_loc(col) for col in census_cat_cols])
# census_k_prototypes_nmi = normalized_mutual_info_score(census_y, census_k_prototypes.labels_)
# census_k_prototypes_nmi

In [None]:
# credit_k_prototypes = KPrototypes(n_clusters=2, random_state=0, verbose=1, n_jobs=-1)
# credit_k_prototypes.fit(credit_df.values, categorical=[credit_df.columns.get_loc(col) for col in credit_cat_cols])
# credit_k_prototypes_nmi = normalized_mutual_info_score(credit_y, credit_k_prototypes.labels_)
# credit_k_prototypes_nmi

In [None]:
# heart_k_prototypes = KPrototypes(n_clusters=5, random_state=0, verbose=1, n_jobs=-1)
# heart_k_prototypes.fit(heart_df.values, categorical=[heart_df.columns.get_loc(col) for col in heart_cat_cols])
# heart_k_prototypes_nmi = normalized_mutual_info_score(heart_y, heart_k_prototypes.labels_)
# heart_k_prototypes_nmi

In [None]:
banking_encoder = torch.nn.Sequential(
    torch.nn.Linear(banking_input_dim, 32),
    torch.nn.BatchNorm1d(32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 16),
    torch.nn.BatchNorm1d(16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 12),
    torch.nn.BatchNorm1d(12),
    torch.nn.Sigmoid(),
)
banking_decoder = torch.nn.Sequential(
    torch.nn.Linear(12, 16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, banking_cat_dim),
    torch.nn.Sigmoid()
)
banking_ae = EmbeddingsAutoencoder(banking_encoder, banking_decoder, banking_input_dim, banking_cat_dim, banking_emb_sizes, attention=False)
banking_ae.fit(n_epochs=100, lr=0.001, dataloader=banking_dataloader)

banking_attention_ae = EmbeddingsAutoencoder(banking_encoder, banking_decoder, banking_input_dim, banking_cat_dim, banking_emb_sizes, attention=True)
banking_attention_ae.fit(n_epochs=100, lr=0.001, dataloader=banking_dataloader)

cat = torch.tensor(banking_df[banking_cat_cols].values, dtype=torch.float)
cont = torch.tensor(banking_df[banking_cont_cols].values, dtype=torch.float)
cat_features = banking_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, banking_df[banking_cont_cols].values), 1)
banking_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
banking_ae_nmi = normalized_mutual_info_score(banking_y, banking_ae_kmeans.labels_)
print(banking_ae_nmi)

cat = torch.tensor(banking_df[banking_cat_cols].values, dtype=torch.float)
cont = torch.tensor(banking_df[banking_cont_cols].values, dtype=torch.float)
cat_features = banking_attention_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, banking_df[banking_cont_cols].values), 1)
banking_attention_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
banking_attention_ae_nmi = normalized_mutual_info_score(banking_y, banking_attention_ae_kmeans.labels_)
print(banking_attention_ae_nmi)

In [None]:
census_encoder = torch.nn.Sequential(
    torch.nn.Linear(census_input_dim, 32),
    torch.nn.BatchNorm1d(32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, 16),
    torch.nn.BatchNorm1d(16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 12),
    torch.nn.BatchNorm1d(12),
    torch.nn.Sigmoid(),
)
census_decoder = torch.nn.Sequential(
    torch.nn.Linear(12, 16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 32),
    torch.nn.ReLU(),
    torch.nn.Linear(32, census_cat_dim),
    torch.nn.Sigmoid()
)
census_ae = EmbeddingsAutoencoder(census_encoder, census_decoder, census_input_dim, census_cat_dim, census_emb_sizes, attention=False)
census_ae.fit(n_epochs=100, lr=0.001, dataloader=census_dataloader)

census_attention_ae = EmbeddingsAutoencoder(census_encoder, census_decoder, census_input_dim, census_cat_dim, census_emb_sizes, attention=True)
census_attention_ae.fit(n_epochs=100, lr=0.001, dataloader=census_dataloader)

cat = torch.tensor(census_df[census_cat_cols].values, dtype=torch.float)
cont = torch.tensor(census_df[census_cont_cols].values, dtype=torch.float)
cat_features = census_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, census_df[census_cont_cols].values), 1)
census_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
census_ae_nmi = normalized_mutual_info_score(census_y, census_ae_kmeans.labels_)
print(census_ae_nmi)

cat = torch.tensor(census_df[census_cat_cols].values, dtype=torch.float)
cont = torch.tensor(census_df[census_cont_cols].values, dtype=torch.float)
cat_features = census_attention_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, census_df[census_cont_cols].values), 1)
census_attention_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
census_attention_ae_nmi = normalized_mutual_info_score(census_y, census_attention_ae_kmeans.labels_)
print(census_attention_ae_nmi)

In [None]:
credit_encoder = torch.nn.Sequential(
    torch.nn.Linear(credit_input_dim, 16),
    torch.nn.BatchNorm1d(16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, 8),
    torch.nn.BatchNorm1d(8),
    torch.nn.Sigmoid(),
    torch.nn.Linear(8, 6),
    torch.nn.BatchNorm1d(6),
    torch.nn.Sigmoid(),
)
credit_decoder = torch.nn.Sequential(
    torch.nn.Linear(6, 8),
    torch.nn.ReLU(),
    torch.nn.Linear(8, 16),
    torch.nn.ReLU(),
    torch.nn.Linear(16, credit_cat_dim),
    torch.nn.Sigmoid()
)
credit_ae = EmbeddingsAutoencoder(credit_encoder, credit_decoder, credit_input_dim, credit_cat_dim, credit_emb_sizes, attention=False)
credit_ae.fit(n_epochs=100, lr=0.001, dataloader=credit_dataloader)

credit_attention_ae = EmbeddingsAutoencoder(credit_encoder, credit_decoder, credit_input_dim, credit_cat_dim, credit_emb_sizes, attention=True)
credit_attention_ae.fit(n_epochs=100, lr=0.001, dataloader=credit_dataloader)

cat = torch.tensor(credit_df[credit_cat_cols].values, dtype=torch.float)
cont = torch.tensor(credit_df[credit_cont_cols].values, dtype=torch.float)
cat_features = credit_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, credit_df[credit_cont_cols].values), 1)
credit_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
credit_ae_nmi = normalized_mutual_info_score(credit_y, credit_ae_kmeans.labels_)
print(credit_ae_nmi)

cat = torch.tensor(credit_df[credit_cat_cols].values, dtype=torch.float)
cont = torch.tensor(credit_df[credit_cont_cols].values, dtype=torch.float)
cat_features = credit_attention_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, credit_df[credit_cont_cols].values), 1)
credit_attention_ae_kmeans = KMeans(n_clusters=2, n_init="auto", random_state=0).fit(features)
credit_attention_ae_nmi = normalized_mutual_info_score(credit_y, credit_attention_ae_kmeans.labels_)
print(credit_attention_ae_nmi)

In [None]:
heart_encoder = torch.nn.Sequential(
    torch.nn.Linear(heart_input_dim, 8),
    torch.nn.BatchNorm1d(8),
    torch.nn.ReLU(),
    torch.nn.Linear(8, 5),
    torch.nn.BatchNorm1d(5),
    torch.nn.Sigmoid(),
    torch.nn.Linear(5, 4),
    torch.nn.BatchNorm1d(4),
    torch.nn.Sigmoid(),
)
heart_decoder = torch.nn.Sequential(
    torch.nn.Linear(4, 5),
    torch.nn.ReLU(),
    torch.nn.Linear(5, 8),
    torch.nn.ReLU(),
    torch.nn.Linear(8, heart_cat_dim),
    torch.nn.Sigmoid()
)
heart_ae = EmbeddingsAutoencoder(heart_encoder, heart_decoder, heart_input_dim, heart_cat_dim, heart_emb_sizes, attention=False)
heart_ae.fit(n_epochs=100, lr=0.001, dataloader=heart_dataloader)

heart_attention_ae = EmbeddingsAutoencoder(heart_encoder, heart_decoder, heart_input_dim, heart_cat_dim, heart_emb_sizes, attention=True)
heart_attention_ae.fit(n_epochs=100, lr=0.001, dataloader=heart_dataloader)

cat = torch.tensor(heart_df[heart_cat_cols].values, dtype=torch.float)
cont = torch.tensor(heart_df[heart_cont_cols].values, dtype=torch.float)
cat_features = heart_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, heart_df[heart_cont_cols].values), 1)
heart_ae_kmeans = KMeans(n_clusters=5, n_init="auto", random_state=0).fit(features)
heart_ae_nmi = normalized_mutual_info_score(heart_y, heart_ae_kmeans.labels_)
print(heart_ae_nmi)

cat = torch.tensor(heart_df[heart_cat_cols].values, dtype=torch.float)
cont = torch.tensor(heart_df[heart_cont_cols].values, dtype=torch.float)
cat_features = heart_attention_ae.encode(cat, cont).detach().numpy()
features = np.concatenate((cat_features, heart_df[heart_cont_cols].values), 1)
heart_attention_ae_kmeans = KMeans(n_clusters=5, n_init="auto", random_state=0).fit(features)
heart_attention_ae_nmi = normalized_mutual_info_score(heart_y, heart_attention_ae_kmeans.labels_)
print(heart_attention_ae_nmi)

In [None]:
banking_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_ae, random_state=np.random.RandomState(0))
banking_dcn.fit(banking_dataloader)
banking_dcn_nmi = normalized_mutual_info_score(banking_y, banking_dcn.labels_)
print(banking_dcn_nmi)

banking_attention_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_attention_ae, random_state=np.random.RandomState(0))
banking_attention_dcn.fit(banking_dataloader)
banking_attention_dcn_nmi = normalized_mutual_info_score(banking_y, banking_attention_dcn.labels_)
print(banking_attention_dcn_nmi)

In [None]:
census_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_ae, random_state=np.random.RandomState(0))
census_dcn.fit(census_dataloader)
census_dcn_nmi = normalized_mutual_info_score(census_y, census_dcn.labels_)
print(census_dcn_nmi)

census_attention_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_attention_ae, random_state=np.random.RandomState(0))
census_attention_dcn.fit(census_dataloader)
census_attention_dcn_nmi = normalized_mutual_info_score(census_y, census_attention_dcn.labels_)
print(census_attention_dcn_nmi)

In [None]:
credit_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_ae, random_state=np.random.RandomState(0))
credit_dcn.fit(credit_dataloader)
credit_dcn_nmi = normalized_mutual_info_score(credit_y, credit_dcn.labels_)
print(credit_dcn_nmi)

credit_attention_dcn = DCNDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_attention_ae, random_state=np.random.RandomState(0))
credit_attention_dcn.fit(credit_dataloader)
credit_attention_dcn_nmi = normalized_mutual_info_score(credit_y, credit_attention_dcn.labels_)
print(credit_attention_dcn_nmi)

In [None]:
heart_dcn = DCNDuped(n_clusters=5, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_ae, random_state=np.random.RandomState(0))
heart_dcn.fit(heart_dataloader)
heart_dcn_nmi = normalized_mutual_info_score(heart_y, heart_dcn.labels_)
print(heart_dcn_nmi)

heart_attention_dcn = DCNDuped(n_clusters=5, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_attention_ae, random_state=np.random.RandomState(0))
heart_attention_dcn.fit(heart_dataloader)
heart_attention_dcn_nmi = normalized_mutual_info_score(heart_y, heart_attention_dcn.labels_)
print(heart_attention_dcn_nmi)

In [None]:
banking_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_ae, random_state=np.random.RandomState(0))
banking_dec.fit(banking_dataloader)
banking_dec_nmi = normalized_mutual_info_score(banking_y, banking_dec.labels_)
print(banking_dec_nmi)

banking_attention_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_attention_ae, random_state=np.random.RandomState(0))
banking_attention_dec.fit(banking_dataloader)
banking_attention_dec_nmi = normalized_mutual_info_score(banking_y, banking_attention_dec.labels_)
print(banking_attention_dec_nmi)

In [None]:
census_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_ae, random_state=np.random.RandomState(0))
census_dec.fit(census_dataloader)
census_dec_nmi = normalized_mutual_info_score(census_y, census_dec.labels_)
print(census_dec_nmi)

census_attention_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_attention_ae, random_state=np.random.RandomState(0))
census_attention_dec.fit(census_dataloader)
census_attention_dec_nmi = normalized_mutual_info_score(census_y, census_attention_dec.labels_)
print(census_attention_dec_nmi)

In [None]:
credit_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_ae, random_state=np.random.RandomState(0))
credit_dec.fit(credit_dataloader)
credit_dec_nmi = normalized_mutual_info_score(credit_y, credit_dec.labels_)
print(credit_dec_nmi)

credit_attention_dec = DECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_attention_ae, random_state=np.random.RandomState(0))
credit_attention_dec.fit(credit_dataloader)
credit_attention_dec_nmi = normalized_mutual_info_score(credit_y, credit_attention_dec.labels_)
print(credit_attention_dec_nmi)

In [None]:
heart_dec = DECDuped(n_clusters=5, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_ae, random_state=np.random.RandomState(0))
heart_dec.fit(heart_dataloader)
heart_dec_nmi = normalized_mutual_info_score(heart_y, heart_dec.labels_)
print(heart_dec_nmi)

heart_attention_dec = DECDuped(n_clusters=5, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_attention_ae, random_state=np.random.RandomState(0))
heart_attention_dec.fit(heart_dataloader)
heart_attention_dec_nmi = normalized_mutual_info_score(heart_y, heart_attention_dec.labels_)
print(heart_attention_dec_nmi)

In [None]:
banking_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_ae, random_state=np.random.RandomState(0))
banking_idec.fit(banking_dataloader)
banking_idec_nmi = normalized_mutual_info_score(banking_y, banking_idec.labels_)
print(banking_idec_nmi)

banking_attention_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=banking_attention_ae, random_state=np.random.RandomState(0))
banking_attention_idec.fit(banking_dataloader)
banking_attention_idec_nmi = normalized_mutual_info_score(banking_y, banking_attention_idec.labels_)
print(banking_attention_idec_nmi)

In [None]:
census_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_ae, random_state=np.random.RandomState(0))
census_idec.fit(census_dataloader)
census_idec_nmi = normalized_mutual_info_score(census_y, census_idec.labels_)
print(census_idec_nmi)

census_attention_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=census_attention_ae, random_state=np.random.RandomState(0))
census_attention_idec.fit(census_dataloader)
census_attention_idec_nmi = normalized_mutual_info_score(census_y, census_attention_idec.labels_)
print(census_attention_idec_nmi)

In [None]:
credit_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_ae, random_state=np.random.RandomState(0))
credit_idec.fit(credit_dataloader)
credit_idec_nmi = normalized_mutual_info_score(credit_y, credit_idec.labels_)
print(credit_idec_nmi)

credit_attention_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=credit_attention_ae, random_state=np.random.RandomState(0))
credit_attention_idec.fit(credit_dataloader)
credit_attention_idec_nmi = normalized_mutual_info_score(credit_y, credit_attention_idec.labels_)
print(credit_attention_idec_nmi)

In [None]:
heart_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_ae, random_state=np.random.RandomState(0))
heart_idec.fit(heart_dataloader)
heart_idec_nmi = normalized_mutual_info_score(heart_y, heart_idec.labels_)
print(heart_idec_nmi)

heart_attention_idec = IDECDuped(n_clusters=2, pretrain_epochs=100, clustering_epochs=100, autoencoder=heart_attention_ae, random_state=np.random.RandomState(0))
heart_attention_idec.fit(heart_dataloader)
heart_attention_idec_nmi = normalized_mutual_info_score(heart_y, heart_attention_idec.labels_)
print(heart_attention_idec_nmi)

In [None]:
x = pd.DataFrame([
    [0.017825373955683156, 0.0224625641297901, 0.10895828404430437, 0.17059947018933327],
    [pd.NA, 0.000023, pd.NA, pd.NA],
    [banking_ae_nmi, census_ae_nmi, credit_ae_nmi, heart_ae_nmi],
    [banking_attention_ae_nmi, census_attention_ae_nmi, credit_attention_ae_nmi, heart_attention_ae_nmi],
    [banking_dcn_nmi, census_dcn_nmi, credit_dcn_nmi, heart_dcn_nmi],
    [banking_attention_dcn_nmi, census_attention_dcn_nmi, credit_attention_dcn_nmi, heart_attention_dcn_nmi],
    [banking_dec_nmi, census_dec_nmi, credit_dec_nmi, heart_dec_nmi],
    [banking_attention_dec_nmi, census_attention_dec_nmi, credit_attention_dec_nmi, heart_attention_dec_nmi],
    [banking_idec_nmi, census_idec_nmi, credit_idec_nmi, heart_idec_nmi],
    [banking_attention_idec_nmi, census_attention_idec_nmi, credit_attention_idec_nmi, heart_attention_idec_nmi],],
    index=[
        "K-Prototypes",
        "Gower + Agglomerative",
        "AE + k-means",
        "Attention AE + k-means",
        "DCN",
        "Attention DCN",
        "DEC",
        "Attention DEC",
        "IDEC",
        "Attention IDEC",],
    columns=["Banking Marketing", "Adult/Census Income", "Credit Approval", "Heart Disease"])
print(x)