In [1]:
import csv
import gzip
import os
import matplotlib as mpl
import scipy.io

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import umap
from sklearn.cluster import Birch, AffinityPropagation, DBSCAN, MeanShift, SpectralClustering, AgglomerativeClustering, KMeans, estimate_bandwidth
from sklearn.mixture import GaussianMixture
from sklearn.neighbors import kneighbors_graph
from itertools import cycle, islice
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
gmmAverage = 0
kmAverage = 0
birchAverage = 0
wardAverage = 0
affPropAverage = 0
aggAverage = 0
spectralAverage = 0
document = open('PARTIALLY MODIFIED MOLECULAR SUBTYPE 100 SIMULATIONS.txt', 'w')
for b in range(1, 101):
    matrix_dir = "genesrna"
    mat = pd.read_csv("genesrna/dataset4.csv")
    mat = np.array(mat)
    datafile = 'dataset4.tpm.txt'
    df = pd.read_csv(datafile, sep='\t')
    df = df.set_index('gene_name')
    cells = list(df.columns.values)
    features_path = os.path.join(matrix_dir, "features4.txt")
    annotation  = pd.read_csv(features_path,sep='\t',header=None)
    annotation.columns = ['gene_names']
    barcodes_path = os.path.join(matrix_dir, "barcodes4.txt")
    barcodes = [line.strip() for line in open(barcodes_path, 'r')]
    mat = mat.T 

    low_expr_thr = 1000
    high_expr_thr = 1000000
    per_cell_sum = mat.sum(axis=1)
    per_gene_sum = mat.sum(axis=0)
    mat = mat[:, (per_gene_sum>=low_expr_thr) & (per_gene_sum<=high_expr_thr)]
    mean_exp = mat.mean(axis=0)
    std_exp = np.sqrt(mat.std(axis=0))
    CV = std_exp/mean_exp

    mat = mat[:, CV >= 0.6]

    cells_expression = mat.sum(axis=1)
    mat = mat[cells_expression>=100, :]
    mat = np.log(mat+1)
    U, S, V = np.linalg.svd(mat) 
    eigvals = S**2 / np.sum(S**2)  
    sing_vals = np.arange(len(mat)) + 1
    pca = PCA(n_components=100)
    pca.fit(mat)
    mat_reduce=pca.transform(mat)
    true_indices = {}
    true_coords = {}
    with open('mat_reduce.txt', 'w') as f:
        for i in mat_reduce:
            f.write("%s\n" % i)
    embedding = umap.UMAP(n_neighbors=30, min_dist=0.1, metric='euclidean').fit_transform(mat_reduce)

    for i in range (0, len(embedding)):
        true_indices.update({str(embedding[i]):cells[i]})
        true_coords.update({cells[i]:str(embedding[i]).strip()})
    with open('embedding.txt', 'w') as f:
        for e in embedding:
            f.write("%s\n" % e)

    params = {'quantile': .3, 'eps': .5, 'damping': .9, 'preference':-200, 'n_neighbors':30, 'n_clusters':4}
    bandwidth = estimate_bandwidth(embedding, quantile=params['quantile'])
    connectivity = kneighbors_graph(embedding, n_neighbors=params['n_neighbors'], include_self = False)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ward = AgglomerativeClustering(n_clusters=params['n_clusters'], linkage='ward', connectivity=connectivity)
    spectral = SpectralClustering(n_clusters=params['n_clusters'], eigen_solver='arpack', affinity="nearest_neighbors")
    dbscan = DBSCAN(eps=params['eps'])
    affinity_propagation = AffinityPropagation(damping=params['damping'], preference=params['preference'])
    average_linkage = AgglomerativeClustering(linkage="average", affinity="cityblock", n_clusters=params['n_clusters'], connectivity=connectivity)
    birch = Birch(n_clusters=params['n_clusters'])
    gmm = GaussianMixture(n_components=params['n_clusters'], covariance_type='full')
    kmeans = KMeans(n_clusters = params['n_clusters'])
    clustering_algorithms = ( 
        ('AffinityPropagation', affinity_propagation),
        ('MeanShift', ms),
        ('SpectralClustering', spectral),
        ('Ward', ward),
        ('AgglomerativeClustering', average_linkage),
        ('DBSCAN', dbscan),
        ('Birch', birch),
        ('GaussianMixture', gmm),
        ('KMeans', kmeans))
    #now plot everything
    
    for idx, (name, algorithm) in enumerate(clustering_algorithms):
        algorithm.fit(embedding)
        if hasattr(algorithm, 'labels_'):
            y_pred = algorithm.labels_.astype(np.int)
        else:
            y_pred = algorithm.predict(embedding)
        colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a',
                                                 '#f781bf', '#a65628', '#984ea3',
                                                 '#999999', '#e41a1c', '#dede00']),
                                          int(max(y_pred) + 1))))
            # add black color for outliers (if any)
        colors = np.append(colors, ["#000000"])

        if hasattr(algorithm, 'labels_'):
            labels = algorithm.labels_
        else:
            labels = algorithm.predict(embedding)

        #Then get the frequency count of the non-negative labels
        counts = np.bincount(labels[labels>=0])

       
    def FindTrueLabels(algorithm):
        algorithm_coords = {}
        if hasattr(algorithm, 'labels_'):
            y_pred = algorithm.labels_.astype(np.int)
        else:
            y_pred = algorithm.predict(embedding)
        for i in range(len(embedding)):
            #plt.scatter(embedding[i:i+1, 0], embedding[i:i+1, 1], s=4, color=color)
            algorithm_coords.update({str(embedding[i]):y_pred[i]})

        true_labels = {}
        for i in algorithm_coords:
            true_labels.update({true_indices[i]:algorithm_coords[i]})
        return true_labels


    def DataCollectionMolecularSubtypeTvP(algorithm, cluster_1, cluster_2, cluster_3, cluster_4, results):
        labels = FindTrueLabels(algorithm)

        num1 = []
        num2 = []
        num3 = []
        num4 = []
        barcodes1 = {}
        barcodes2 = {}
        barcodes3 = {}
        barcodes4 = {}
        truthnum1 = {}

        for i in labels:
            if labels[i] == 0:
                barcodes1.update({i:labels[i]})
            elif labels[i] == 1:
                barcodes2.update({i:labels[i]})
            elif labels[i] == 2:
                barcodes3.update({i:labels[i]})
            else:
                barcodes4.update({i:labels[i]})

        with open(cluster_1, 'w') as filehandle:
            for listitem in barcodes1:
                filehandle.write('%s\n' % listitem)
        with open(cluster_2, 'w') as filehandle:
            for listitem in barcodes2:
                filehandle.write('%s\n' % listitem)
        with open(cluster_3, 'w') as filehandle:
            for listitem in barcodes3:
                filehandle.write('%s\n' % listitem)
        with open(cluster_4, 'w') as filehandle:
            for listitem in barcodes4:
                filehandle.write('%s\n' % listitem)

        dp = open('PARTIALLY_MODIFIED_DPCELLS.txt').readlines()
        dp_line = []
        for lines in dp:
            dp_line.append(lines)
        erp = open('PARTIALLY_MODIFIED_ERP.txt').readlines()
        erp_line = []
        for lines in erp:
            erp_line.append(lines)
        her2p = open('PARTIALLY_MODIFIED_HER2P.txt').readlines()
        her2p_line = []
        for lines in her2p:
            her2p_line.append(lines)

        tnbc = open('PARTIALLY_MODIFIED_TNBC.txt').readlines()
        tnbc_line = []
        for lines in tnbc:
            tnbc_line.append(lines)

        res = open(results, "w")

        dpcount = 0
        erpcount = 0
        her2pcount = 0
        tnbccount = 0

        dpcount2 = 0
        erpcount2 = 0
        her2pcount2 = 0
        tnbccount2 = 0

        dpcount3 = 0
        erpcount3 = 0
        her2pcount3 = 0
        tnbccount3 = 0

        dpcount4 = 0
        erpcount4 = 0
        her2pcount4 = 0
        tnbccount4 = 0

        cluster1 = open(cluster_1).readlines()
        cluster1_line = []
        for lines in cluster1:
            cluster1_line.append(lines)
        for i in cluster1_line:
            for j in dp_line:
                if i == j:
                    dpcount = dpcount + 1
        res.write("DP Count Cluster 1: \n")
        res.write(str(dpcount/(len(cluster1_line))) + " \n")
        res.write(str(dpcount) + " \n")
        for x in cluster1_line:
            for y in erp_line:
                if x == y:
                    erpcount = erpcount + 1
        res.write("ERP Count Cluster 1: \n")
        res.write(str(erpcount/(len(cluster1_line))) + "\n")
        res.write(str(erpcount) + "\n")
        for i in cluster1_line:
            for j in her2p_line:
                if i == j:
                    her2pcount = her2pcount + 1
        res.write("HER2P Count Cluster 1: \n")
        res.write(str(her2pcount/(len(cluster1_line))) + "\n")
        res.write(str(her2pcount) + "\n")
        for i in cluster1_line:
            for j in tnbc_line:
                if i == j:
                    tnbccount = tnbccount + 1
        res.write("TNBC Count Cluster 1: \n")
        res.write(str(tnbccount/(len(cluster1_line))) + "\n")
        res.write(str(tnbccount) + "\n")
        res.write("\n")

        cluster2 = open(cluster_2).readlines()
        cluster2_line = []
        for lines in cluster2:
            cluster2_line.append(lines)
        for i in cluster2_line:
            for j in dp_line:
                if i == j:
                    dpcount2 = dpcount2 + 1
        res.write("DP Count Cluster 2:" + "\n")
        res.write(str(dpcount2/(len(cluster2_line))) + "\n")
        res.write(str(dpcount2) + "\n")
        for i in cluster2_line:
            for j in erp_line:
                if i == j:
                    erpcount2 = erpcount2 + 1
        res.write("ERP Count Cluster 2:" + "\n")
        res.write(str(erpcount2/(len(cluster2_line))) + "\n")
        res.write(str(erpcount2) + "\n")
        for i in cluster2_line:
            for j in her2p_line:
                if i == j:
                    her2pcount2 = her2pcount2 + 1
        res.write("HER2P Count Cluster 2: \n")
        res.write(str(her2pcount2/(len(cluster2_line))) + "\n")
        res.write(str(her2pcount2) + "\n")
        for i in cluster2_line:
            for j in tnbc_line:
                if i == j:
                    tnbccount2 = tnbccount2 + 1
        res.write("TNBC Count Cluster 2: \n")
        res.write(str(tnbccount2/(len(cluster2_line))) + "\n")
        res.write(str(tnbccount2) + "\n")
        res.write("\n")

        cluster3 = open(cluster_3).readlines()
        cluster3_line = []
        for lines in cluster3:
            cluster3_line.append(lines)
        for i in cluster3_line:
            for j in dp_line:
                if i == j:
                    dpcount3 = dpcount3 + 1
        res.write("DP Count Cluster 3:" + "\n")
        res.write(str(dpcount3/(len(cluster3_line))) + "\n")
        res.write(str(dpcount3) + "\n")
        for i in cluster3_line:
            for j in erp_line:
                if i == j:
                    erpcount3 = erpcount3 + 1
        res.write("ERP Count Cluster 3:" + "\n")
        res.write(str(erpcount3/(len(cluster3_line))) + "\n")
        res.write(str(erpcount3) + "\n")
        for i in cluster3_line:
            for j in her2p_line:
                if i == j:
                    her2pcount3 = her2pcount3 + 1
        res.write("HER2P Count Cluster 3:" + "\n")
        res.write(str(her2pcount3/(len(cluster3_line))) + "\n")
        res.write(str(her2pcount3) + "\n")
        for i in cluster3_line:
            for j in tnbc_line:
                if i == j:
                    tnbccount3 = tnbccount3 + 1
        res.write("TNBC Count Cluster 3:" + "\n")
        res.write(str(tnbccount3/(len(cluster3_line))) + "\n")
        res.write(str(tnbccount3) + "\n")
        res.write("\n")

        cluster4 = open(cluster_4).readlines()
        cluster4_line = []
        for lines in cluster4:
            cluster4_line.append(lines)
        for i in cluster4_line:
            for j in dp_line:
                if i == j:
                    dpcount4 = dpcount4 + 1
        res.write("DP Count Cluster 4:" + "\n")
        res.write(str(dpcount4/(len(cluster4_line))) + "\n")
        res.write(str(dpcount4) + "\n")
        for i in cluster4_line:
            for j in erp_line:
                if i == j:
                    erpcount4 = erpcount4 + 1
        res.write("ERP Count Cluster 4:" + "\n")
        res.write(str(erpcount4/(len(cluster4_line))) + "\n")
        res.write(str(erpcount4) + "\n")
        for i in cluster4_line:
            for j in her2p_line:
                if i == j:
                    her2pcount4 = her2pcount4 + 1
        res.write("HER2P Count Cluster 4:" + "\n")
        res.write(str(her2pcount4/(len(cluster4_line))) + "\n")
        res.write(str(her2pcount4) + "\n")
        for i in cluster4_line:
            for j in tnbc_line:
                if i == j:
                    tnbccount4 = tnbccount4 + 1
        res.write("TNBC Count Cluster 4:" + "\n")
        res.write(str(tnbccount4/(len(cluster4_line))) + "\n")
        res.write(str(tnbccount4) + "\n")
        res.write("\n")


        dpcount_stats = [dpcount/len(cluster1_line), dpcount2/len(cluster2_line), dpcount3/len(cluster3_line), dpcount4/len(cluster4_line)]
        erpcount_stats = [erpcount/len(cluster1_line), erpcount2/len(cluster2_line), erpcount3/len(cluster3_line), erpcount4/len(cluster4_line)]
        her2pcount_stats = [her2pcount/len(cluster1_line), her2pcount2/len(cluster2_line), her2pcount3/len(cluster3_line), her2pcount4/len(cluster4_line)]
        tnbccount_stats = [tnbccount/len(cluster1_line), tnbccount2/len(cluster2_line), tnbccount3/len(cluster3_line), tnbccount4/len(cluster4_line)]

        new_stats = np.zeros((4, 4))

        all_stats = np.array([dpcount_stats, erpcount_stats, her2pcount_stats, tnbccount_stats])
        
        for i in range (0, 4):
            max_num = all_stats.max()
            if(max_num == 0):
                break
            indices = np.where(all_stats == all_stats.max())
            all_stats[indices[0]] = [0, 0, 0, 0]
            #for i in range(0, 3):
            all_stats[:, indices[1]] = 0
            new_stats[indices] = max_num
        dpcount_stats = new_stats[0]
        erpcount_stats = new_stats[1]
        her2pcount_stats = new_stats[2]
        tnbccount_stats = new_stats[3]


        cluster1_search = False
        cluster2_search = False
        cluster3_search = False
        cluster4_search = False

        for i in cluster1_line:
            for j in dp_line:
                if i == j:
                    if(max(dpcount_stats) == dpcount_stats[0]): 
                        color = 'black'
                        cluster1_search = True
                    else: 
                        color = 'red'
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})

        for x in cluster1_line:
            for y in erp_line:
                if x == y:
                    if(max(erpcount_stats) == erpcount_stats[0]): 
                        color = 'black'
                        cluster1_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[x[0:len(x)-1]]:color})

        for i in cluster1_line:
            for j in her2p_line:
                if i == j:
                    if(max(her2pcount_stats) == her2pcount_stats[0]): 
                        color = 'black'
                        cluster1_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})
        for i in cluster1_line:
            for j in tnbc_line:
                if i == j:
                    if(max(tnbccount_stats) == tnbccount_stats[0]): 
                        color = 'black'
                        cluster1_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})


        for i in cluster2_line:
            for j in dp_line:
                if i == j:
                    if(max(dpcount_stats) == dpcount_stats[1]): 
                        color = 'black'
                        cluster2_search = True
                    else: 
                        color = 'red'
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})

        for x in cluster2_line:
            for y in erp_line:
                if x == y:
                    if(max(erpcount_stats) == erpcount_stats[1]): 
                        color = 'black'
                        cluster2_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[x[0:len(x)-1]]:color})

        for i in cluster2_line:
            for j in her2p_line:
                if i == j:
                    if(max(her2pcount_stats) == her2pcount_stats[1]): 
                        color = 'black'
                        cluster2_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})
        for i in cluster2_line:
            for j in tnbc_line:
                if i == j:
                    if(max(tnbccount_stats) == tnbccount_stats[1]): 
                        color = 'black'
                        cluster2_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})



        for i in cluster3_line:
            for j in dp_line:
                if i == j:
                    if(max(dpcount_stats) == dpcount_stats[2]): 
                        color = 'black'
                        cluster3_search = True
                    else: 
                        color = 'red'
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})

        for x in cluster3_line:
            for y in erp_line:
                if x == y:
                    if(max(erpcount_stats) == erpcount_stats[2]): 
                        color = 'black'
                        cluster3_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[x[0:len(x)-1]]:color})

        for i in cluster3_line:
            for j in her2p_line:
                if i == j:
                    if(max(her2pcount_stats) == her2pcount_stats[2]): 
                        color = 'black'
                        cluster2_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})
        for i in cluster3_line:
            for j in tnbc_line:
                if i == j:
                    if(max(tnbccount_stats) == tnbccount_stats[2]): 
                        color = 'black'
                        cluster3_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})


        for i in cluster4_line:
            for j in dp_line:
                if i == j:
                    if(max(dpcount_stats) == dpcount_stats[3]): 
                        color = 'black'
                        cluster4_search = True
                    else: 
                        color = 'red'
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})

        for x in cluster4_line:
            for y in erp_line:
                if x == y:
                    if(max(erpcount_stats) == erpcount_stats[3]): 
                        color = 'black'
                        cluster4_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[x[0:len(x)-1]]:color})

        for i in cluster4_line:
            for j in her2p_line:
                if i == j:
                    if(max(her2pcount_stats) == her2pcount_stats[3]): 
                        color = 'black'
                        cluster4_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color})
        for i in cluster4_line:
            for j in tnbc_line:
                if i == j:
                    if(max(tnbccount_stats) == tnbccount_stats[3]): 
                        color = 'black'
                        cluster4_search = True
                    else: 
                        color = 'red'   
                    truthnum1.update({true_coords[i[0:len(i)-1]]:color}) 


        correctCount = 0
        for i in truthnum1:
            if(truthnum1[i] == 'black'):
                correctCount += 1
        incorrectCount = (len(truthnum1)-correctCount)/len(truthnum1)
        correctCount = correctCount/len(truthnum1)
        return truthnum1, correctCount*100, incorrectCount*100
    def DataViz2(figname, algorithms, cluster1, cluster2, cluster3, cluster4, results):
        for i in range(len(algorithms)):
            truthnum1, correct, incorrect = DataCollectionMolecularSubtypeTvP(algorithms[i], cluster1[i], cluster2[i], cluster3[i], cluster4[i], results[i])
            if(algorithms[i] == gmm):
                global gmmAverage
                gmmAverage += correct
                document.write('GAUSSIAN: ' + str(correct) +'\n')
                print('GAUSSIAN TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == ward):
                global wardAverage
                wardAverage += correct
                document.write('WARD: ' + str(correct) +'\n')
                print('WARD TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == birch):
                global birchAverage
                birchAverage += correct
                document.write('BIRCH: ' + str(correct) +'\n')
                print('BIRCH TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == affinity_propagation):
                global affPropAverage
                affPropAverage += correct
                document.write('AFFINITY PROPAGATION: ' + str(correct) +'\n')
                print('AFFINITY PROPAGATION TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == average_linkage):
                global aggAverage
                aggAverage += correct
                document.write('AGGLOMERATIVE: ' + str(correct) +'\n')
                print('AGGLOMERATIVE TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == spectral):
                global spectralAverage
                spectralAverage += correct
                document.write('SPECTRAL: ' + str(correct) +'\n')
                print('SPECTRAL TRIAL ' + str(b) + ': ' + str(correct) +'\n')
            elif(algorithms[i] == kmeans):
                global kmAverage
                kmAverage += correct
                document.write('KMEANS: ' + str(correct) + '\n')
                print('KMEANS TRIAL ' + str(b) + ': ' + str(correct) + '\n')
           
    algorithms = [gmm, spectral, ward, birch, average_linkage, affinity_propagation, kmeans]
    figname = ["Gaussian Mixture Model", "Spectral Clustering", "Ward Clustering", "Birch Clustering", "Agglomerative Clustering", "Affinity Propagation", "KMeans Clustering"]
    cluster1 = ["gmm_cluster1.txt", "spectral_cluster1.txt", "ward_cluster1.txt", "birch_cluster1.txt", "agg_cluster1.txt", "aff_cluster1.txt", "km_cluster1.txt"]
    cluster2 = ["gmm_cluster2.txt", "spectral_cluster2.txt", "ward_cluster2.txt", "birch_cluster2.txt", "agg_cluster2.txt", "aff_cluster2.txt", "km_cluster2.txt"]
    cluster3 = ["gmm_cluster3.txt", "spectral_cluster3.txt", "ward_cluster3.txt", "birch_cluster3.txt", "agg_cluster3.txt", "aff_cluster3.txt", "km_cluster3.txt"]
    cluster4 = ["gmm_cluster4.txt", "spectral_cluster4.txt", "ward_cluster4.txt", "birch_cluster4.txt", "agg_cluster4.txt", "aff_cluster4.txt", "km_cluster4.txt"]
    results = ["gmm_results.txt", "spectral_results.txt", "ward_results.txt", "birch_results.txt", "agg_results.txt", "aff_results.txt", "km_results.txt"]
    DataViz2(figname, algorithms, cluster1, cluster2, cluster3, cluster4, results)
    
document.write('GAUSSIAN FINAL: ' + str(gmmAverage/100) +'\n')
document.write('BIRCH FINAL: ' + str(birchAverage/100) +'\n')
document.write('WARD FINAL: ' + str(wardAverage/100) +'\n')
document.write('SPECTRAL FINAL: ' + str(spectralAverage/100) +'\n')
document.write('AFFINITY PROPAGATION FINAL: ' + str(affPropAverage/100) +'\n')
document.write('AGGLOMERATIVE FINAL: ' + str(aggAverage/100) +'\n')
document.write('KMEANS FINAL: ' + str(kmAverage/100) + '\n')
document.close()
print('GAUSSIAN FINAL: ' + str(gmmAverage/100) +'\n')
print('BIRCH FINAL: ' + str(birchAverage/100) +'\n')
print('WARD FINAL: ' + str(wardAverage/100) +'\n')
print('SPECTRAL FINAL: ' + str(spectralAverage/100) +'\n')
print('MEANSHIFT FINAL: ' + str(affPropAverage/100) +'\n')
print('AGGLOMERATIVE FINAL: ' + str(aggAverage/100) +'\n')
print('KMEANS FINAL: ' + str(kmAverage/100) + '\n')

GAUSSIAN TRIAL 1: 48.837209302325576

SPECTRAL TRIAL 1: 44.96124031007752

WARD TRIAL 1: 50.58139534883721

BIRCH TRIAL 1: 46.70542635658915

AGGLOMERATIVE TRIAL 1: 38.17829457364341

AFFINITY PROPAGATION TRIAL 1: 48.643410852713174

KMEANS TRIAL 1: 58.139534883720934

GAUSSIAN TRIAL 2: 60.07751937984496

SPECTRAL TRIAL 2: 32.945736434108525

WARD TRIAL 2: 62.98449612403101

BIRCH TRIAL 2: 37.98449612403101

AGGLOMERATIVE TRIAL 2: 54.263565891472865

AFFINITY PROPAGATION TRIAL 2: 55.23255813953488

KMEANS TRIAL 2: 55.81395348837209

GAUSSIAN TRIAL 3: 57.55813953488372

SPECTRAL TRIAL 3: 57.751937984496124

WARD TRIAL 3: 50.1937984496124

BIRCH TRIAL 3: 45.15503875968992

AGGLOMERATIVE TRIAL 3: 39.72868217054263

AFFINITY PROPAGATION TRIAL 3: 50.58139534883721

KMEANS TRIAL 3: 50.1937984496124

GAUSSIAN TRIAL 4: 43.992248062015506

SPECTRAL TRIAL 4: 38.372093023255815

WARD TRIAL 4: 59.10852713178295

BIRCH TRIAL 4: 52.13178294573644

AGGLOMERATIVE TRIAL 4: 54.263565891472865

AFFINITY 

WARD TRIAL 31: 64.53488372093024

BIRCH TRIAL 31: 42.44186046511628

AGGLOMERATIVE TRIAL 31: 39.14728682170542

AFFINITY PROPAGATION TRIAL 31: 49.41860465116279

KMEANS TRIAL 31: 48.837209302325576

GAUSSIAN TRIAL 32: 49.224806201550386

SPECTRAL TRIAL 32: 41.08527131782946

WARD TRIAL 32: 51.162790697674424

BIRCH TRIAL 32: 41.86046511627907

AGGLOMERATIVE TRIAL 32: 38.17829457364341

AFFINITY PROPAGATION TRIAL 32: 50.58139534883721

KMEANS TRIAL 32: 51.162790697674424

GAUSSIAN TRIAL 33: 43.41085271317829

SPECTRAL TRIAL 33: 53.29457364341085

WARD TRIAL 33: 57.17054263565892

BIRCH TRIAL 33: 54.263565891472865

AGGLOMERATIVE TRIAL 33: 26.744186046511626

AFFINITY PROPAGATION TRIAL 33: 54.06976744186046

KMEANS TRIAL 33: 55.23255813953488

GAUSSIAN TRIAL 34: 55.03875968992248

SPECTRAL TRIAL 34: 37.2093023255814

WARD TRIAL 34: 54.263565891472865

BIRCH TRIAL 34: 43.992248062015506

AGGLOMERATIVE TRIAL 34: 25.0

AFFINITY PROPAGATION TRIAL 34: 52.71317829457365

KMEANS TRIAL 34: 46.31

WARD TRIAL 61: 58.139534883720934

BIRCH TRIAL 61: 51.356589147286826

AGGLOMERATIVE TRIAL 61: 39.34108527131783

AFFINITY PROPAGATION TRIAL 61: 50.0

KMEANS TRIAL 61: 49.6124031007752

GAUSSIAN TRIAL 62: 39.34108527131783

SPECTRAL TRIAL 62: 39.34108527131783

WARD TRIAL 62: 51.55038759689923

BIRCH TRIAL 62: 42.82945736434108

AGGLOMERATIVE TRIAL 62: 55.62015503875969

AFFINITY PROPAGATION TRIAL 62: 50.3875968992248

KMEANS TRIAL 62: 50.58139534883721

GAUSSIAN TRIAL 63: 54.45736434108527

SPECTRAL TRIAL 63: 48.06201550387597

WARD TRIAL 63: 55.81395348837209

BIRCH TRIAL 63: 57.55813953488372

AGGLOMERATIVE TRIAL 63: 39.53488372093023

AFFINITY PROPAGATION TRIAL 63: 51.93798449612403

KMEANS TRIAL 63: 57.55813953488372

GAUSSIAN TRIAL 64: 60.65891472868217

SPECTRAL TRIAL 64: 38.17829457364341

WARD TRIAL 64: 56.007751937984494

BIRCH TRIAL 64: 40.89147286821706

AGGLOMERATIVE TRIAL 64: 39.53488372093023

AFFINITY PROPAGATION TRIAL 64: 47.286821705426355

KMEANS TRIAL 64: 49.0310077

WARD TRIAL 91: 55.62015503875969

BIRCH TRIAL 91: 55.23255813953488

AGGLOMERATIVE TRIAL 91: 38.759689922480625

AFFINITY PROPAGATION TRIAL 91: 52.90697674418605

KMEANS TRIAL 91: 54.45736434108527

GAUSSIAN TRIAL 92: 43.21705426356589

SPECTRAL TRIAL 92: 37.98449612403101

WARD TRIAL 92: 51.74418604651163

BIRCH TRIAL 92: 40.50387596899225

AGGLOMERATIVE TRIAL 92: 40.69767441860465

AFFINITY PROPAGATION TRIAL 92: 41.86046511627907

KMEANS TRIAL 92: 41.86046511627907

GAUSSIAN TRIAL 93: 49.224806201550386

SPECTRAL TRIAL 93: 35.65891472868217

WARD TRIAL 93: 50.775193798449614

BIRCH TRIAL 93: 58.52713178294574

AGGLOMERATIVE TRIAL 93: 39.72868217054263

AFFINITY PROPAGATION TRIAL 93: 49.41860465116279

KMEANS TRIAL 93: 49.41860465116279

GAUSSIAN TRIAL 94: 60.07751937984496

SPECTRAL TRIAL 94: 41.66666666666667

WARD TRIAL 94: 58.52713178294574

BIRCH TRIAL 94: 63.372093023255815

AGGLOMERATIVE TRIAL 94: 38.17829457364341

AFFINITY PROPAGATION TRIAL 94: 37.98449612403101

KMEANS TRIAL