In [1]:
#Updated on 21.02.2022

import numpy as np
from numpy.random import RandomState
from numpy.random import MT19937
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.mixture_mod5 import GaussianMixtureMod5
from sklearn import metrics
from sklearn.metrics import accuracy_score
from pytictoc import TicToc
from statistics import mean, stdev
from tqdm.auto import tqdm, trange
from sklearn.metrics import accuracy_score, adjusted_rand_score
# from statmodels import robust
import warnings
warnings.filterwarnings('ignore')
import time
import datetime
from pathlib import Path
import os

def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
#     print("\ncontingency_matrix : \n", contingency_matrix)
    # return purity
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix)

#Get the Clustering Algorithm Performance
def cluster_eval(y_true, y_pred, X_train):
    eval_scores = []
   
    accuracy = np.mean(y_pred.ravel() == y_true.ravel()) * 100 
    purity = purity_score(y_train, y_pred)
    ari_score = adjusted_rand_score(y_true, y_pred)

#     eval_scores = [m, cluster_label, elapsed_time, iter_num, purity, ari_score]
    eval_scores = [purity, ari_score, elapsed_time, iter_num,]
    return eval_scores

#Main Program start here!---------------------------------------------------------------------------------------------------------------------------------------#
# dataset_labels = ["transfusion", "breastCancer", "heartDisease", "australian", "japanese", "vertebral", "haberman", "iris", "new_thyroid", "dermatology"] # Done 22.01.2022
# dataset_labels = ["bank", "seismicBumps", "german", "chess", "abalone", "wallRobot","wilt", "adultIncome"] 
# dataset_labels = ["transfusion", "wilt", "breastCancer", "heartDisease", "adultIncome", "australian", "japanese", "bank", "seismicBumps", "german", 
#                   "chess", "vertebral", "haberman", "iris", "abalone", "new_thyroid","wallRobot", "dermatology",] 
dataset_labels = ["iris"] 
# dataset_labels = ["iris", "transfusion"] 

method_labels = ["KMeans", 'GMM-random', 'GMM-kMeans', 'GMM-init_hybrid', 'OBGMM']
result_labels = ["Dataset","Algorithm", "purity", "purity_std", "ari_score", "ari_score_std", "Elapsed Time", "Iter_num"]

cov_type = 'full'
t = TicToc() # create TicToc instance
num_repeat = 2

# Declare All Clustering methods
clust1 = 'KMeans(n_clusters = n_classes)'
clust2 = "GaussianMixture(n_components = n_classes, init_params='random', covariance_type='full', max_iter=100)"
clust3 = "GaussianMixture(n_components = n_classes, init_params='kmeans', covariance_type='full', max_iter=100)"
clust4 = "GaussianMixture(n_components = n_classes, init_params='jaha_init_hybrid', covariance_type='full', max_iter=100)"
clust5 = "GaussianMixtureMod5(n_components=n_classes, init_params='jaha_init_hybrid', covariance_type='full', max_iter=100, resp_type='remove')"

all_result = pd.DataFrame()
for m in tqdm(dataset_labels):
    #Read dataset
    dataset = pd.read_csv("dataset/"+m+".csv")
    #Drop Target Column in data using Index
    X_train = dataset.drop('Target',axis=1)
    #Get y_train
    y_train =  dataset['Target']
    n_classes = len(np.unique(y_train))  

    print("Start Dataset : ",m)
    all_scores = []
    for clust,met_label in zip([clust1, clust2, clust3, clust4, clust5], method_labels):
#     for clust,met_label in zip([clust1, clust2, clust3], method_labels):
        met_scores = []
        print("\nMethod :",met_label)
        for repeat in range (num_repeat):  
            print("Loop :",repeat)
            t.tic() # Start timer
            #Run clstering algorithm for related clusteringmethod
            cluster_method = eval(clust)
#             print("\nClustering method :", cluster_method)          
            cluster_method.fit(X_train)
#             print("\nClustering method Fit:", repeat)  
            elapsed_time = t.tocvalue() #End Timer & Save elapsed time
            
            iter_num = cluster_method.n_iter_
            #print("kMeans time : ", elapsed_time )   
            y_train_pred = cluster_method.predict(X_train)
            
            eval_scores = cluster_eval(y_train, y_train_pred, X_train)
            met_scores += [eval_scores]
#             print(eval_scores) 
        met_scores = pd.DataFrame(met_scores)
#         all_scores.columns=result_labels 
        mean_scores = met_scores.mean()
        std_scores = met_scores.iloc[:,0:2].std()
        met_scores = [m, met_label, mean_scores[0], std_scores[0],mean_scores[1], std_scores[1], mean_scores[2], mean_scores[3] ]   
        all_scores += [met_scores]
        
    #Change result to Pandas DataFrame
    result = pd.DataFrame(all_scores)
    all_result = all_result.append(result)
all_result.columns=result_labels   
print("\nAll_results: \n",all_result) 

#Save results to Excel
ts = time.time() 
st = datetime.datetime.fromtimestamp(ts).strftime('%H-%M-%S---%d-%m-%Y')
# all_result.to_excel("result/All_result_"+st+".xlsx", index=False)
all_result.to_excel("result/new_loop/All_result_"+st+".xlsx", index=False)
filePath = Path("result/new_loop/All_result_"+st+".xlsx", index=False).resolve()
print("filePath :", filePath)
print("\nFinish at ",st)
print("Number of repeat :", num_repeat )

os.system(f'start excel.exe "{filePath}"')

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Start Dataset :  iris

Method : KMeans
Loop : 0
Loop : 1

Method : GMM-random
Loop : 0
Loop : 1

Method : GMM-kMeans
Loop : 0
Loop : 1

Method : GMM-init_hybrid
Loop : 0
n_components :  3
cluster_range :
 [[2.09 3.1 ]
 [3.1  4.1 ]
 [4.1  5.11]]

 p_dist_mean  0 : 
 0.015760516300045928

 p_dist_mean  1 : 
 0.00733555406154107

 p_dist_mean  2 : 
 0.0037183952385362565
all_cluster_k_master2 not empty 0 : 
 0    5.025000
1    3.447917
2    1.475000
3    0.245833
dtype: float64
all_cluster_k_master2 not empty 1 : 
 0    6.028205
1    2.789744
2    4.412821
3    1.384615
dtype: float64
all_cluster_k_master2 not empty 2 : 
 0    6.764
1    3.072
2    5.728
3    2.092
dtype: float64

 p_dist_mean2  0 : 
 0.30567255651311237

 p_dist_mean2  1 : 
 0.029691048366644778

 p_dist_mean2  2 : 
 0.03749871948594017

 p_dist_out_mean : 
 [0.30567255651311237, 0.029691048366644778, 0.03749871948594017]

 out_threshold : 0.30567255651311237
p_start : 
 7
p_end : 
 10
Loop : 1
n_components :  3
cluster_

 9.94055598e-01 7.78635328e-01]
th_resp : [0.5        0.5        0.49999998]
self.weights_ : th_resp  in m_step  [0.33333333 0.37333333 0.29333333]  :  [0.5        0.5        0.49999998]
Check th_resp in GaussianMixtureMod5 :
 [0.5        0.5        0.49999998]

In _estimate_gaussian_parameters_em, gaussian_mixture_mod5.py
nk:  [49.99999999 54.1565969  45.84340311]
nk_new:  [49.99999999 52.29191631 43.13531943]
weights_ in  remove  :  [49.99999999 54.1565969  45.84340311]
End of iteration :  1
log_prob_norm : -1.2789025762626993
lower_bound : -1.2789025762626993
prev_lower_bound : -inf
change : inf
Start of iteration :  2
x_resp : [1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.0

change : 0.03004856757603691
Start of iteration :  3
x_resp : [1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 4.51734972e-092 1.43876148e-083
 2.22424537e-104 3.92652706e-064 2.37958840e-092 3.61443885e-079
 8.16240061e-094 4.98207269e

 1.18411538e-158 2.91151105e-121]
x_resp : [1.16193543e-30 3.73062402e-22 3.31357540e-25 6.07847208e-22
 5.72886432e-32 3.34551394e-31 8.10942885e-25 7.97857167e-28
 2.70200038e-19 1.39266404e-24 3.00006072e-34 3.16194262e-26
 1.35084792e-23 1.50096086e-23 5.97674739e-44 3.41849929e-44
 2.77720624e-35 1.26430965e-28 1.41593277e-32 1.14019720e-32
 1.11015117e-26 1.08196679e-28 3.55513726e-32 2.12249665e-18
 4.45910285e-23 1.39837759e-20 1.03390197e-22 4.23849534e-30
 2.94265383e-29 7.33464030e-23 1.08466361e-21 5.29289016e-25
 2.09803643e-43 2.90943400e-45 7.81430413e-23 6.83600279e-27
 1.52921642e-32 1.13307118e-33 3.28225138e-21 4.02736300e-28
 5.20674174e-29 1.10269051e-11 7.83163238e-24 1.51429756e-19
 9.13148323e-26 3.58981147e-20 4.71829301e-34 4.13314369e-24
 5.94252142e-34 4.75697829e-27 9.99987154e-01 9.99870927e-01
 9.99335486e-01 9.98192026e-01 9.98576878e-01 9.93925213e-01
 9.97280415e-01 9.99998596e-01 9.99930869e-01 9.96415905e-01
 9.99975558e-01 9.99366342e-01 9.99995573e

 4.47495388e-09 7.34339886e-03]
x_resp : [1.40483494e-42 6.13450933e-36 2.31284952e-37 8.51616327e-33
 1.25616090e-42 1.06413903e-41 3.35617433e-35 3.97324545e-39
 8.87430794e-31 2.85935050e-36 7.92671979e-46 1.10573660e-35
 4.34934516e-36 3.12107630e-35 9.66884276e-58 6.98307741e-54
 6.84462280e-48 3.50102918e-41 1.16572383e-44 1.09569207e-42
 8.55097399e-39 6.34269376e-40 5.92272705e-43 5.79034255e-31
 3.09284379e-30 1.55710196e-33 1.39571461e-34 6.73080002e-42
 2.06064457e-42 5.41465199e-33 5.82258715e-33 1.43965178e-39
 2.28437512e-49 3.06361952e-54 2.22471415e-35 2.05901480e-41
 1.69554615e-47 1.03658159e-42 8.51151351e-33 5.34843890e-40
 1.43581822e-41 5.82443763e-27 2.74489838e-34 5.01505050e-31
 4.18360325e-34 8.64820139e-34 1.96270846e-42 4.69739796e-35
 6.02476756e-45 1.13424036e-39 2.03444110e-05 2.44937134e-04
 8.89230978e-04 2.52121179e-03 1.94604665e-03 9.70041994e-03
 5.61173964e-03 3.25882752e-06 1.04857290e-04 7.90698607e-03
 3.28848896e-05 1.35252499e-03 6.63126216e-0

 9.99999997e-01 9.93992758e-01]
th_resp : [0.5        0.49999997 0.5       ]
self.weights_ : th_resp  in m_step  [0.33333333 0.31758649 0.34908018]  :  [0.5        0.49999997 0.5       ]
Check th_resp in GaussianMixtureMod5 :
 [0.5        0.49999997 0.5       ]

In _estimate_gaussian_parameters_em, gaussian_mixture_mod5.py
nk:  [50.         47.42005549 52.57994451]
nk_new:  [50.         47.17722293 50.75716744]
weights_ in  remove  :  [50.         47.42005549 52.57994451]
End of iteration :  6
log_prob_norm : -1.215780153722687
lower_bound : -1.215780153722687
prev_lower_bound : -1.2161854683117501
change : 0.00040531458906301054
Num of Iteration :  6
Finish of th_resp :  1
Loop : 1

In __init__ , base.py

In __init__ , gaussian_mixture_mod5 class in gaussian_mixturem_mod5.py

In fit , base.py

In fit_predict , base.py
n_components :  3
cluster_range :
 [[2.09 3.1 ]
 [3.1  4.1 ]
 [4.1  5.11]]

 p_dist_mean  0 : 
 0.015760516300045928

 p_dist_mean  1 : 
 0.00733555406154107

 p_dist_me

 9.94055598e-01 7.78635328e-01]
th_resp : [0.5        0.5        0.49999998]
self.weights_ : th_resp  in m_step  [0.33333333 0.37333333 0.29333333]  :  [0.5        0.5        0.49999998]
Check th_resp in GaussianMixtureMod5 :
 [0.5        0.5        0.49999998]

In _estimate_gaussian_parameters_em, gaussian_mixture_mod5.py
nk:  [49.99999999 54.1565969  45.84340311]
nk_new:  [49.99999999 52.29191631 43.13531943]
weights_ in  remove  :  [49.99999999 54.1565969  45.84340311]
End of iteration :  1
log_prob_norm : -1.2789025762626993
lower_bound : -1.2789025762626993
prev_lower_bound : -inf
change : inf
Start of iteration :  2
x_resp : [1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.0

change : 0.03004856757603691
Start of iteration :  3
x_resp : [1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 1.00000000e+000 1.00000000e+000
 1.00000000e+000 1.00000000e+000 4.51734972e-092 1.43876148e-083
 2.22424537e-104 3.92652706e-064 2.37958840e-092 3.61443885e-079
 8.16240061e-094 4.98207269e

 1.18411538e-158 2.91151105e-121]
x_resp : [1.16193543e-30 3.73062402e-22 3.31357540e-25 6.07847208e-22
 5.72886432e-32 3.34551394e-31 8.10942885e-25 7.97857167e-28
 2.70200038e-19 1.39266404e-24 3.00006072e-34 3.16194262e-26
 1.35084792e-23 1.50096086e-23 5.97674739e-44 3.41849929e-44
 2.77720624e-35 1.26430965e-28 1.41593277e-32 1.14019720e-32
 1.11015117e-26 1.08196679e-28 3.55513726e-32 2.12249665e-18
 4.45910285e-23 1.39837759e-20 1.03390197e-22 4.23849534e-30
 2.94265383e-29 7.33464030e-23 1.08466361e-21 5.29289016e-25
 2.09803643e-43 2.90943400e-45 7.81430413e-23 6.83600279e-27
 1.52921642e-32 1.13307118e-33 3.28225138e-21 4.02736300e-28
 5.20674174e-29 1.10269051e-11 7.83163238e-24 1.51429756e-19
 9.13148323e-26 3.58981147e-20 4.71829301e-34 4.13314369e-24
 5.94252142e-34 4.75697829e-27 9.99987154e-01 9.99870927e-01
 9.99335486e-01 9.98192026e-01 9.98576878e-01 9.93925213e-01
 9.97280415e-01 9.99998596e-01 9.99930869e-01 9.96415905e-01
 9.99975558e-01 9.99366342e-01 9.99995573e

 4.47495388e-09 7.34339886e-03]
x_resp : [1.40483494e-42 6.13450933e-36 2.31284952e-37 8.51616327e-33
 1.25616090e-42 1.06413903e-41 3.35617433e-35 3.97324545e-39
 8.87430794e-31 2.85935050e-36 7.92671979e-46 1.10573660e-35
 4.34934516e-36 3.12107630e-35 9.66884276e-58 6.98307741e-54
 6.84462280e-48 3.50102918e-41 1.16572383e-44 1.09569207e-42
 8.55097399e-39 6.34269376e-40 5.92272705e-43 5.79034255e-31
 3.09284379e-30 1.55710196e-33 1.39571461e-34 6.73080002e-42
 2.06064457e-42 5.41465199e-33 5.82258715e-33 1.43965178e-39
 2.28437512e-49 3.06361952e-54 2.22471415e-35 2.05901480e-41
 1.69554615e-47 1.03658159e-42 8.51151351e-33 5.34843890e-40
 1.43581822e-41 5.82443763e-27 2.74489838e-34 5.01505050e-31
 4.18360325e-34 8.64820139e-34 1.96270846e-42 4.69739796e-35
 6.02476756e-45 1.13424036e-39 2.03444110e-05 2.44937134e-04
 8.89230978e-04 2.52121179e-03 1.94604665e-03 9.70041994e-03
 5.61173964e-03 3.25882752e-06 1.04857290e-04 7.90698607e-03
 3.28848896e-05 1.35252499e-03 6.63126216e-0

 9.99999997e-01 9.93992758e-01]
th_resp : [0.5        0.49999997 0.5       ]
self.weights_ : th_resp  in m_step  [0.33333333 0.31758649 0.34908018]  :  [0.5        0.49999997 0.5       ]
Check th_resp in GaussianMixtureMod5 :
 [0.5        0.49999997 0.5       ]

In _estimate_gaussian_parameters_em, gaussian_mixture_mod5.py
nk:  [50.         47.42005549 52.57994451]
nk_new:  [50.         47.17722293 50.75716744]
weights_ in  remove  :  [50.         47.42005549 52.57994451]
End of iteration :  6
log_prob_norm : -1.215780153722687
lower_bound : -1.215780153722687
prev_lower_bound : -1.2161854683117501
change : 0.00040531458906301054
Num of Iteration :  6
Finish of th_resp :  1


All_results: 
   Dataset        Algorithm    purity  purity_std  ari_score  ari_score_std  \
0    iris           KMeans  0.893333         0.0   0.730238            0.0   
1    iris       GMM-random  0.793333         0.0   0.568803            0.0   
2    iris       GMM-kMeans  0.966667         0.0   0.903874       

0

In [None]:
all_scores1 = [ [0.021298,  19,  0.793333,  0.568803],
                [0.024569,  22,  0.740000,  0.523316],
                [0.024907,  23,  0.793333,  0.568803]]
all_scores2 = pd.DataFrame(all_scores1)
print(all_scores1)
print("\nAll_scores: ",all_scores2) 

mean_scores = all_scores2.iloc[:,2:4].mean()
std_scores = all_scores2.iloc[:,2:4].std()

print("\nMean_scores: \n",mean_scores)   
print("\nStd_scores: \n",std_scores) 
all_scores = ([mean_scores[2], std_scores[2],mean_scores[3], std_scores[3]])

print("\nAll_scores: ",all_scores) 
# print("\nMean_scores: \n",mean_scores)   
# print("\nStd_scores: \n",std_scores) 
# print(all_scores2.iloc[:,2:4] )

In [None]:
import numpy as np
from numpy.random import RandomState
from numpy.random import MT19937
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn import metrics
from sklearn.metrics import accuracy_score
from pytictoc import TicToc
from statistics import mean, stdev
from tqdm.auto import tqdm, trange
from sklearn.metrics import accuracy_score, adjusted_rand_score
# import time

def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
#     print("\ncontingency_matrix : \n", contingency_matrix)
    # return purity
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix)

#Get the Clustering Algorithm Performance
def cluster_eval(y_true, y_pred, X_train):
    eval_scores = []
   
    accuracy = np.mean(y_pred.ravel() == y_true.ravel()) * 100 
    purity = purity_score(y_train, y_pred)
    ari_score = adjusted_rand_score(y_true, y_pred)

#     eval_scores = [m, cluster_label, elapsed_time, iter_num, purity, ari_score]
    eval_scores = [purity, ari_score, elapsed_time, iter_num,]
    return eval_scores

#Main Program start here!---------------------------------------------------------------------------------------------------------------------------------------#
# dataset_labels = ["iris","transfusion", "wilt", "breastCancer"] 
dataset_labels = ["iris", "transfusion"] 
method_labels = ["KMeans", 'GMM-random', 'GMM-kMeans']
result_labels = ["Dataset","Algorithm", "purity", "purity_std", "ari_score", "ari_score_std", "Elapsed Time", "Iter_num"]

cov_type = 'full'
t = TicToc() # create TicToc instance
num_repreat = 100

# Declare All Clustering methods
clust1 = 'KMeans(n_clusters = n_classes)'
clust2 = "GaussianMixture(n_components = n_classes, init_params='random', covariance_type='full', max_iter=100)"
clust3 = "GaussianMixture(n_components = n_classes, init_params='kmeans', covariance_type='full', max_iter=100)"

all_result = pd.DataFrame()
for m in tqdm(dataset_labels):
    #Read dataset
    dataset = pd.read_csv("dataset/"+m+".csv")
    #Drop Target Column in data using Index
    X_train = dataset.drop('Target',axis=1)
    #Get y_train
    y_train =  dataset['Target']
    n_classes = len(np.unique(y_train))  
    
#     print(np.array(X_train))
#     print(np.array(y_train))
#     mt19937 = MT19937()
#     rs = RandomState(mt19937)
#     rs_val = rs.standard_normal()
#     print("RandomState :", rs_val)
    print("\n\nStart Dataset : ",m)
    all_scores = []
    for clust,met_label in zip([clust1, clust2, clust3], method_labels):
        met_scores = []
        for repeat in range (num_repreat):       
            t.tic() # Start timer

            #Run the kMeans on current  

            cluster_method = eval(clust)
#             print("\nClustering method :", cluster_method)
            
            cluster_method.fit(X_train)
            elapsed_time = t.tocvalue() #Save elapsed time
            iter_num = cluster_method.n_iter_
            #print("kMeans time : ", elapsed_time )   
            y_train_pred = cluster_method.predict(X_train)
            
            eval_scores = cluster_eval(y_train, y_train_pred, X_train)
            met_scores += [eval_scores]
#             print(eval_scores) 
        met_scores = pd.DataFrame(met_scores)
#         all_scores.columns=result_labels 
        mean_scores = met_scores.mean()
        std_scores = met_scores.iloc[:,0:2].std()
        met_scores = [m, met_label, mean_scores[0], std_scores[0],mean_scores[1], std_scores[1], mean_scores[2], mean_scores[3] ]   
#         print("\nMet_scores: ",met_scores) 
#         print("\nMean_scores: \n",mean_scores)   
#         print("\nStd_scores: \n",std_scores)  
        all_scores += [met_scores]
        
#     print("\nAll_scores: ",all_scores) 
    #Change result to Pandas DataFrame
    result = pd.DataFrame(all_scores)
    all_result = all_result.append(result)
#     all_result = all_result.append(pd.Series(), ignore_index=True) 
all_result.columns=result_labels   
print("\nAll_results: ",all_result) 

       
#     all_result = all_result.append(pd.Series(), ignore_index=True) 
#     all_result.columns=result_labels 

#             purity = purity_score(y_train, y_train_pred)
#             accuracy = accuracy_score(y_train, y_train_pred)
#         #     print("Cluster Center :", kMeans_cluster.cluster_centers_)
#         #     print("\nPurity ",repeat,":", purity)
#         #     print("Accuracy :", accuracy)
#             all_purity.append(purity)
#             all_accuracy.append(accuracy)
#         # print("\nAll_Purity :\n", all_purity)
#         mean_pu = mean(all_purity)
#         std_pu = stdev(all_purity)
#         print("Purity Means :", mean_pu ,",","Std :", std_pu)

#         # print("\nAll_accuracy :\n", all_accuracy)
#         mean_ac = mean(all_accuracy)
#         std_ac = stdev(all_accuracy)
#         print("Accuracy Means :", mean_ac ,",","Std :", std_ac)





In [None]:
import numpy as np
from numpy.random import RandomState
from numpy.random import MT19937
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn import metrics
from sklearn.metrics import accuracy_score
from pytictoc import TicToc
from statistics import mean, stdev
from tqdm.auto import tqdm, trange
from sklearn.metrics import accuracy_score, adjusted_rand_score
# Declare All Clustering methods
clf1 = 'KMeans(n_clusters = n_classes)'
clf2 = "GaussianMixture(n_components = n_classes, init_params='kmeans', means_init= None, covariance_type='full', max_iter=50)"
# clf3 = GaussianMixture
    

m = 'iris'
dataset = pd.read_csv("dataset/"+m+".csv")

#Drop Target Column in data using Index
X_train = dataset.drop('Target',axis=1)

# #How to get Target data
y_train =  dataset['Target']
n_classes = 3
for clf, in zip([clf1, clf2]):
    print("clf :", clf)
    cluster_method = eval(clf)
    print("cluster_method :",cluster_method)

  


In [None]:
import numpy as np
from numpy.random import RandomState
from numpy.random import MT19937
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn import metrics
from sklearn.metrics import accuracy_score
from pytictoc import TicToc
from statistics import mean, stdev
# import time

def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
#     print("\ncontingency_matrix : \n", contingency_matrix)
    # return purity
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix)


# dataset_labels = ["transfusion", "wilt", "breastCancer", "heartDisease", "adultIncome", "australian", "japanese", "bank", "seismicBumps", "german", 
#                   "chess", "iris", "abalone", "wallRobot", "dermatology"] 
m = 'iris'

print("\n\nStart Dataset : ",m)
#Read dataset
dataset = pd.read_csv("dataset/"+m+".csv")

#Drop Target Column in data using Index
X_train = dataset.drop('Target',axis=1)

# #How to get Target data
y_train =  dataset['Target']
# print(np.array(X_train))
# print(np.array(y_train))
# mt19937 = MT19937()
# rs = RandomState(mt19937)
# rs_val = rs.standard_normal()
# print("RandomState :", rs_val)

cov_type = 'full'
n_classes = len(np.unique(y_train))   
t = TicToc() # create TicToc instance
num_repreat = 10

all_purity = []
all_accuracy = []
all_purity2 = []
all_accuracy2 = []
all_purity3 = []
all_accuracy3 = []

print("\nk-Means Clustering :")
for repeat in range (num_repreat):       
    t.tic() # Start timer
    
    #Run the kMeans on current  
    
    kMeans_cluster = KMeans( n_clusters = n_classes)
    print("cluster_method :",kMeans_cluster)
    kMeans_cluster.fit(X_train)
    elapsed_time = t.tocvalue() #Save elapsed time
    iter_num = kMeans_cluster.n_iter_
    #print("kMeans time : ", elapsed_time )   
    y_train_pred = kMeans_cluster.predict(X_train)
    purity = purity_score(y_train, y_train_pred)
    accuracy = accuracy_score(y_train, y_train_pred)
#     print("Cluster Center :", kMeans_cluster.cluster_centers_)
#     print("\nPurity ",repeat,":", purity)
#     print("Accuracy :", accuracy)
    all_purity.append(purity)
    all_accuracy.append(accuracy)
# print("\nAll_Purity :\n", all_purity)
mean_pu = mean(all_purity)
std_pu = stdev(all_purity)
print("Purity Means :", mean_pu ,",","Std :", std_pu)

# print("\nAll_accuracy :\n", all_accuracy)
mean_ac = mean(all_accuracy)
std_ac = stdev(all_accuracy)
print("Accuracy Means :", mean_ac ,",","Std :", std_ac)

print("\nGMM Clustering-random :")
for repeat in range (num_repreat):  
    #Run the GMM on current
    gmm_cluster = GaussianMixture(n_components = n_classes, init_params='random', means_init= None, covariance_type=cov_type, max_iter=60)
    print("cluster_method :",gmm_cluster)
    t.tic() # Start timer
    
    gmm_cluster.fit(X_train)
    elapsed_time = t.tocvalue() #Save elapsed time
    iter_num = gmm_cluster.n_iter_
#     print("GMM time : ", elapsed_time )       
    y_train_pred2 = gmm_cluster.predict(X_train)
    purity2 = purity_score(y_train, y_train_pred2)
    accuracy2 = accuracy_score(y_train, y_train_pred2)
#     print("Cluster Center :", kMeans_cluster.cluster_centers_)
    all_purity2.append(purity2)
    all_accuracy2.append(accuracy2)
# print("\nAll_Purity :\n", all_purity2)
mean_pu2 = mean(all_purity2)
std_pu2 = stdev(all_purity2)
print("Purity Means :", mean_pu2 ,",","Std :", std_pu2)

# print("\nAll_accuracy :\n", all_accuracy2)
mean_ac2 = mean(all_accuracy2)
std_ac2 = stdev(all_accuracy2)
print("Accuracy Means :", mean_ac2 ,",","Std :", std_ac2)

print("\nGMM Clustering-kmeans :")
for repeat in range (num_repreat):  
    #Run the GMM on current
    gmm_cluster2 = GaussianMixture(n_components = n_classes, init_params='kmeans', means_init= None, covariance_type=cov_type, max_iter=50)
    print("cluster_method :",gmm_cluster2)
    t.tic() # Start timer
    gmm_cluster2.fit(X_train)
    elapsed_time = t.tocvalue() #Save elapsed time
    iter_num = gmm_cluster2.n_iter_
#     print("GMM time : ", elapsed_time )       
    y_train_pred3 = gmm_cluster2.predict(X_train)
    purity3 = purity_score(y_train, y_train_pred3)
    accuracy3 = accuracy_score(y_train, y_train_pred3)
#     print("Cluster Center :", kMeans_cluster.cluster_centers_)
    all_purity3.append(purity3)
    all_accuracy3.append(accuracy3)
# print("\nAll_Purity :\n", all_purity3)
mean_pu3 = mean(all_purity3)
std_pu3 = stdev(all_purity3)
print("Purity Means :", mean_pu3 ,",","Std :", std_pu3)

# print("\nAll_accuracy :\n", all_accuracy3)
mean_ac3 = mean(all_accuracy3)
std_ac3 = stdev(all_accuracy3)
print("Accuracy Means :", mean_ac3 ,",","Std :", std_ac3)

