In [1]:
# init
import pandas as pd
import numpy as np
# from tqdm import tqdm
import json
# import nltk
# nltk.download()

In [None]:
import glob
import os

In [None]:
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [2]:
import math
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import spdiags

In [None]:
from sklearn.cluster import KMeans
# from spherecluster import SphericalKMeans
# from soyclustering import SphericalKMeans
# from sphericalcluster import SKMeans
from matplotlib import pyplot as plt

In [None]:
from sklearn.metrics import silhouette_score
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics import adjusted_mutual_info_score

Import Data

In [None]:
# import pubmed
pubmed = []
docList = glob.glob(os.path.join(os.getcwd(), "Datasets/pubmed/", "*.txt"))

for docPath in tqdm(docList):
    # get doc file name
    docName = os.path.basename(docPath).split('.')[0]
    className = docName[:3]
    
    with open(docPath, encoding="utf8") as doc:
        # insert [class, docs, feature]
        pubmed.append([className, docName, doc.read().replace('\n', ' ')])

# make dataframe
dataframe = pd.DataFrame(data=pubmed, columns=['class', 'document', 'feature']) 

# export pubmed raw
dataframe.to_csv('pubmed/raw.csv', index=False)

In [None]:
# import scopus
scopus = []
docList = glob.glob(os.path.join(os.getcwd(), "Datasets/scopus/", "*"))

for docPath in tqdm(docList):
    # get doc file name
    docName = os.path.basename(docPath)
    className = docName.split('-')[0]
    
    with open(docPath, encoding="utf8") as doc:
        # insert [class, docs, feature]
        scopus.append([className, docName, doc.read().replace('\n', ' ')])

# make dataframe
dataframe = pd.DataFrame(data=scopus, columns=['class', 'document', 'feature']) 

# export pubmed raw
dataframe.to_csv('scopus/raw.csv', index=False)

Dataframe Raw

In [None]:
# read csv
pubmedRaw = pd.read_csv('pubmed/raw.csv')
scopusRaw = pd.read_csv('scopus/raw.csv')

# get feature
pubmedFeatures = pubmedRaw.loc[:, 'feature']
scopusFeatures = scopusRaw.loc[:, 'feature']
scopusFeatures

Preprocesing

In [None]:
# cleaning
def cleaning(features):
    result = []
    for feature in features:
        regex = re.sub(r'[^a-zA-Z\s]', '', feature)
        result.append(regex)
    return result

# case folding
def caseFolding(features):
    result = []
    for feature in features:
        lower = feature.lower()
        result.append(lower)
    return result

# tokenization
def tokenization(features):
    result = []
    for feature in features:
        token = word_tokenize(feature)
        result.append(token)
    return result

# stopwords removal
def stopWords(features):
    result = []
    stopWords = set(stopwords.words('english'))
    for token in features:
        cleanedFeature = [feature for feature in token if feature not in stopWords]
        result.append(cleanedFeature)
    return result

In [None]:
# preprocessing
def preprocessing(features):
    clean = cleaning(features)
    case = caseFolding(clean)
    token = tokenization(case)
    preprocessed = stopWords(token)
    return preprocessed
    
pubmedPreprocessed = preprocessing(pubmedFeatures)
scopusPreprocessed = preprocessing(scopusFeatures)
# print(scopusPreprocessed[0])

# export pubmed clean
for i in range(len(pubmedPreprocessed)):
    pubmedRaw.loc[i, 'feature'] = ' '.join(pubmedPreprocessed[i])
pubmedRaw.to_csv('pubmed/clean.csv', index=False)

# export scopus clean
for i in range(len(scopusPreprocessed)):
    scopusRaw.loc[i, 'feature'] = ' '.join(scopusPreprocessed[i])
scopusRaw.to_csv('scopus/clean.csv', index=False)

Daraframe Clean

In [None]:
# read csv
pubmedClean = pd.read_csv('pubmed/clean.csv')
scopusClean = pd.read_csv('scopus/clean.csv')

# get feature
pubmedFeatures = pubmedClean.loc[:, 'feature']
scopusFeatures = scopusClean.loc[:, 'feature']
scopusClean

Feature Forming

In [None]:
from textblob import TextBlob

def featureForming(features, dataframe):
    # BOAW
    dataframe.rename(columns={'feature': 'BOAW'}, inplace=True)
    for i in tqdm(range(len(features))):
        # BON
        dataframe.loc[i, 'BON'] = ' '.join(TextBlob(features[i]).noun_phrases)
        # BONA
        dataframe.loc[i, 'BONA'] = ' '.join([word for (word, tag) in TextBlob(features[i]).tags if tag[:2]=='NN' or tag[:2]=='JJ'])

featureForming(pubmedFeatures, pubmedClean)
featureForming(scopusFeatures, scopusClean)
# print(scopusClean)

pubmedClean.to_csv('pubmed/formed.csv', index=False)
scopusClean.to_csv('scopus/formed.csv', index=False)

Dataframe Formed

In [None]:
# read csv
pubmedFormed = pd.read_csv('pubmed/formed.csv')
scopusFormed = pd.read_csv('scopus/formed.csv')

# get feature
# pubmed
pubmedClass = pubmedFormed.loc[:, 'class']
pubmed_boaw = pubmedFormed.loc[:, 'BOAW']
pubmed_bon = pubmedFormed.loc[:, 'BON']
pubmed_bona = pubmedFormed.loc[:, 'BONA']

# scopus
scopusClass = scopusFormed.loc[:, 'class']
scopus_boaw = scopusFormed.loc[:, 'BOAW']
scopus_bon = scopusFormed.loc[:, 'BON']
scopus_bona = scopusFormed.loc[:, 'BONA']
scopusClass

Feature Selection

In [None]:
# SFS
def symbolic(features, classes):
    # get tf weights
    tfVec = CountVectorizer()
    tf = tfVec.fit_transform(features)

    # define weights into dataframe
    featureName = tfVec.get_feature_names_out()
    featureWeight = tf.todense()
    df = pd.DataFrame(featureWeight, columns=featureName)
    df.loc[:, 'class'] = classes
    # return df

    # count mean & standard deviation
    mean = df.groupby('class').mean().reset_index()
    std = df.groupby('class').std().reset_index()

    totalSm = []
    for col in tqdm(mean.columns):
        if col != 'class':
            # count interval per features
            interval = []
            for row in range(len(mean)):
                meanA = mean.loc[row, col]
                stdA = std.loc[row, col]
                interval.append([meanA - stdA, meanA + stdA])
            
            # count similarity per feature
            similarity = 0
            for itvA in interval:
                # count similarity per class
                smClass = []
                for itvB in interval:
                    if itvA != itvB:
                        # check if interval B equals 0
                        sm = 0
                        if (itvB[1] - itvB[0]) != 0:
                            sm = (min(itvA[1], itvB[1]) - max(itvA[0], itvB[0])) / (itvB[1] - itvB[0])
                        smClass.append(sm)

                # count total similarity
                similarity += np.sum(smClass)
            totalSm.append(similarity)

    # count average total similarity
    avgTotalSm = np.mean(totalSm)

    # select feature that totalSm > avgTotalSm
    selected = []
    for i in range(len(totalSm)):
        if totalSm[i] > avgTotalSm:
            selected.append(df.columns[i])
    # return len(selected)
    return selected

# sfx = symbolic(scopus_boaw, scopusClass)
# print(sfx)

In [None]:
# TF-RF
def relevance(features, classes):
    # get tf weights
    tfVec = CountVectorizer()
    tf = tfVec.fit_transform(features)

    # get classes weights
    clsVec = LabelBinarizer()
    cls = clsVec.fit_transform(classes)
    # return cls, cls.shape

    # if has only 1 class, add negative class
    if cls.shape[1] == 1:
        cls = np.append(1 - cls, cls, axis=1)

    # # count class contain feature (1 if class contain the feature)
    # featureDoc = ((cls.T * tf) > 0).astype(np.float64)
    # # count document contain feature (count per feature, for each class)
    # featureDoc = (cls.T * tf).astype(np.float64)

    # count tf as boolean (1 if document contain the feature)
    tfBool = (tf > 0).astype(np.float64)
    # count document contain feature (for each class)
    featureDoc = cls.T * tfBool
    # return featureDoc

    for classC in range(cls.shape[1]):
        # featureDoc.sum(axis=0) - featureDoc[classC] is the sum of all rows except classC
        featureDoc[classC] /= np.maximum(1., featureDoc.sum(axis=0) - featureDoc[classC])

    # count rf
    rf = np.mean(np.log2(2 + featureDoc, out=featureDoc), axis=0)

    # count tf-rf & transform to sparse matrix
    totalFeature = rf.shape[0]
    tfRf = tf * spdiags(rf, 0, totalFeature, totalFeature)

    # define weights into dataframe
    featureName = tfVec.get_feature_names_out()
    featureWeight = tfRf.todense()
    df = pd.DataFrame(featureWeight, columns=featureName)
    df.loc[:, 'class'] = classes
    # return df

    # count weight by class
    mean = df.groupby('class').mean()
    result = (mean/mean.max()).sum()
    # return result

    # select minimum weighted features
    selected = []
    for i in tqdm(range(len(result))):
        if result[i] > np.mean(result):
            selected.append(df.columns[i])
    # return len(selected)
    return selected

# rfx = relevance(pubmed_bon, pubmedClass)
# print(rfx)

In [None]:
# implementation
# Symbolic Relevance
def symbolic_relevance(features, classes):
    # get sfs
    sfs = symbolic(features, classes)
    # get tf-rf
    tfrf = relevance(features, classes)

    # intersect features
    selected = list(np.intersect1d(sfs, tfrf))
    # print(len(selected))

    # update features
    token = tokenization(features)
    result = []
    for row in tqdm(token):
        words = [word for word in row if word in selected]
        result.append(words)
    return result

# srx = symbolic_relevance(scopus_bona, scopusClass)
# print(srx)

# SFS
def symbolic_feature_selection(features, classes):
    # get sfs
    selected = symbolic(features, classes)
    print(len(selected))

    # update features
    token = tokenization(features)
    result = []
    for row in tqdm(token):
        words = [word for word in row if word in selected]
        result.append(words)
    return result

# sfsx = symbolic_feature_selection(scopus_bona, scopusClass)
# print(sfsx)

# TF-RF
def term_frequency_relevance_frequency(features, classes):
    # get tfrf
    selected = relevance(features, classes)
    print(len(selected))

    # update features
    token = tokenization(features)
    result = []
    for row in tqdm(token):
        words = [word for word in row if word in selected]
        result.append(words)
    return result

# tfrfx = term_frequency_relevance_frequency(scopus_bona, scopusClass)
# print(tfrfx)

In [None]:
# feature selection
# pubmed
# Symbolic Relevance
pubmed_boaw_sr = symbolic_relevance(pubmed_boaw, pubmedClass)
pubmed_bon_sr = symbolic_relevance(pubmed_bon, pubmedClass)
pubmed_bona_sr = symbolic_relevance(pubmed_bona, pubmedClass)

# SFS
pubmed_boaw_sfs = symbolic_feature_selection(pubmed_boaw, pubmedClass)
pubmed_bon_sfs = symbolic_feature_selection(pubmed_bon, pubmedClass)
pubmed_bona_sfs = symbolic_feature_selection(pubmed_bona, pubmedClass)

# TF-RF
pubmed_boaw_tfrf = term_frequency_relevance_frequency(pubmed_boaw, pubmedClass)
pubmed_bon_tfrf = term_frequency_relevance_frequency(pubmed_bon, pubmedClass)
pubmed_bona_tfrf = term_frequency_relevance_frequency(pubmed_bona, pubmedClass)


# scopus
# Symbolic Relevance
scopus_boaw_sr = symbolic_relevance(scopus_boaw, scopusClass)
scopus_bon_sr = symbolic_relevance(scopus_bon, scopusClass)
scopus_bona_sr = symbolic_relevance(scopus_bona, scopusClass)

# SFS
scopus_boaw_sfs = symbolic_feature_selection(scopus_boaw, scopusClass)
scopus_bon_sfs = symbolic_feature_selection(scopus_bon, scopusClass)
scopus_bona_sfs = symbolic_feature_selection(scopus_bona, scopusClass)

# TF-RF
scopus_boaw_tfrf = term_frequency_relevance_frequency(scopus_boaw, scopusClass)
scopus_bon_tfrf = term_frequency_relevance_frequency(scopus_bon, scopusClass)
scopus_bona_tfrf = term_frequency_relevance_frequency(scopus_bona, scopusClass)

In [None]:
# save csv
# pubmed
pubmedFormed.drop(['BOAW', 'BON', 'BONA'], axis=1, inplace=True)
for i in tqdm(range(len(pubmedFormed))):
    # SR
    pubmedFormed.loc[i, 'BOAW_SR'] = ' '.join(pubmed_boaw_sr[i])
    pubmedFormed.loc[i, 'BON_SR'] = ' '.join(pubmed_bon_sr[i])
    pubmedFormed.loc[i, 'BONA_SR'] = ' '.join(pubmed_bona_sr[i])
    # SFS
    pubmedFormed.loc[i, 'BOAW_SFS'] = ' '.join(pubmed_boaw_sfs[i])
    pubmedFormed.loc[i, 'BON_SFS'] = ' '.join(pubmed_bon_sfs[i])
    pubmedFormed.loc[i, 'BONA_SFS'] = ' '.join(pubmed_bona_sfs[i])
    # TF-RF
    pubmedFormed.loc[i, 'BOAW_TFRF'] = ' '.join(pubmed_boaw_tfrf[i])
    pubmedFormed.loc[i, 'BON_TFRF'] = ' '.join(pubmed_bon_tfrf[i])
    pubmedFormed.loc[i, 'BONA_TFRF'] = ' '.join(pubmed_bona_tfrf[i])

pubmedFormed.to_csv('pubmed/selected.csv', index=False)


# scopus
scopusFormed.drop(['BOAW', 'BON', 'BONA'], axis=1, inplace=True)
for i in tqdm(range(len(scopusFormed))):
    # SR
    scopusFormed.loc[i, 'BOAW_SR'] = ' '.join(scopus_boaw_sr[i])
    scopusFormed.loc[i, 'BON_SR'] = ' '.join(scopus_bon_sr[i])
    scopusFormed.loc[i, 'BONA_SR'] = ' '.join(scopus_bona_sr[i])
    # SFS
    scopusFormed.loc[i, 'BOAW_SFS'] = ' '.join(scopus_boaw_sfs[i])
    scopusFormed.loc[i, 'BON_SFS'] = ' '.join(scopus_bon_sfs[i])
    scopusFormed.loc[i, 'BONA_SFS'] = ' '.join(scopus_bona_sfs[i])
    # TF-RF
    scopusFormed.loc[i, 'BOAW_TFRF'] = ' '.join(scopus_boaw_tfrf[i])
    scopusFormed.loc[i, 'BON_TFRF'] = ' '.join(scopus_bon_tfrf[i])
    scopusFormed.loc[i, 'BONA_TFRF'] = ' '.join(scopus_bona_tfrf[i])

scopusFormed.to_csv('scopus/selected.csv', index=False)

In [3]:
# read csv
pubmedSelected = pd.read_csv('pubmed/selected.csv')
scopusSelected = pd.read_csv('scopus/selected.csv')

# get feature
# pubmed
pubmedClass = pubmedSelected.loc[:, 'class']
pubmed_boaw_sr = pubmedSelected.loc[:, 'BOAW_SR']
pubmed_bon_sr = pubmedSelected.loc[:, 'BON_SR']
pubmed_bona_sr = pubmedSelected.loc[:, 'BONA_SR']
pubmed_boaw_sfs = pubmedSelected.loc[:, 'BOAW_SFS']
pubmed_bon_sfs = pubmedSelected.loc[:, 'BON_SFS']
pubmed_bona_sfs = pubmedSelected.loc[:, 'BONA_SFS']
pubmed_boaw_tfrf = pubmedSelected.loc[:, 'BOAW_TFRF']
pubmed_bon_tfrf = pubmedSelected.loc[:, 'BON_TFRF']
pubmed_bona_tfrf = pubmedSelected.loc[:, 'BONA_TFRF']

# scopus
scopusClass = scopusSelected.loc[:, 'class']
scopus_boaw_sr = scopusSelected.loc[:, 'BOAW_SR']
scopus_bon_sr = scopusSelected.loc[:, 'BON_SR']
scopus_bona_sr = scopusSelected.loc[:, 'BONA_SR']
scopus_boaw_sfs = scopusSelected.loc[:, 'BOAW_SFS']
scopus_bon_sfs = scopusSelected.loc[:, 'BON_SFS']
scopus_bona_sfs = scopusSelected.loc[:, 'BONA_SFS']
scopus_boaw_tfrf = scopusSelected.loc[:, 'BOAW_TFRF']
scopus_bon_tfrf = scopusSelected.loc[:, 'BON_TFRF']
scopus_bona_tfrf = scopusSelected.loc[:, 'BONA_TFRF']
# pubmedSelected

Term Weighting

In [4]:
# TF
def tf(features):
    # get tf weights & transform to sparse matrix
    tfVec = CountVectorizer()
    tf = tfVec.fit_transform(features.fillna(' '))
    return tf

# tfx = tf(bona)
# print(tfx)

# TF-IDF
def tf_idf(features):
    # get tf-idf weights & transform to sparse matrix
    tfIdfVec = TfidfVectorizer()
    tfIdf = tfIdfVec.fit_transform(features.fillna(' '))
    return tfIdf

# idfx = tf_idf(scopusBonSr)
# print(idfx)

# TF-IDF-ICF
def tf_idf_icf(features, classes):
    # count tf-idf
    tfIdf = tf_idf(features)

    # get classes weights
    clsVec = LabelBinarizer()
    cls = clsVec.fit_transform(classes)

    # count total class
    totalClass = cls.shape[1]

    # count class contain feature (1 if contain the feature, then sum per class)
    classFeature = ((cls.T * tfIdf) > 0).astype(np.float64).sum(axis=0)

    # count icf
    icf = []
    for featureId in range(tfIdf.shape[1]):
        icf.append(1 + math.log(totalClass / classFeature[featureId]))
    icf = np.array(icf)

    # get tf-idf-icf weights & transform to sparse matrix
    totalFeature = icf.shape[0]
    tfIdfIcf = tfIdf * spdiags(icf, 0, totalFeature, totalFeature)
    return tfIdfIcf

# icfx = tf_idf_icf(pubmedBonSr, pubmedClass)
# print(icfx)

In [5]:
# term weighting
# pubmed
# SR
pubmed_boaw_sr_tf = tf(pubmed_boaw_sr)
pubmed_boaw_sr_tfIdf = tf_idf(pubmed_boaw_sr)
pubmed_boaw_sr_tfIdfIcf = tf_idf_icf(pubmed_boaw_sr, pubmedClass)
pubmed_bon_sr_tf = tf(pubmed_bon_sr)
pubmed_bon_sr_tfIdf = tf_idf(pubmed_bon_sr)
pubmed_bon_sr_tfIdfIcf = tf_idf_icf(pubmed_bon_sr, pubmedClass)
pubmed_bona_sr_tf = tf(pubmed_bona_sr)
pubmed_bona_sr_tfIdf = tf_idf(pubmed_bona_sr)
pubmed_bona_sr_tfIdfIcf = tf_idf_icf(pubmed_bona_sr, pubmedClass)
# SFS
pubmed_boaw_sfs_tf = tf(pubmed_boaw_sfs)
pubmed_boaw_sfs_tfIdf = tf_idf(pubmed_boaw_sfs)
pubmed_boaw_sfs_tfIdfIcf = tf_idf_icf(pubmed_boaw_sfs, pubmedClass)
pubmed_bon_sfs_tf = tf(pubmed_bon_sfs)
pubmed_bon_sfs_tfIdf = tf_idf(pubmed_bon_sfs)
pubmed_bon_sfs_tfIdfIcf = tf_idf_icf(pubmed_bon_sfs, pubmedClass)
pubmed_bona_sfs_tf = tf(pubmed_bona_sfs)
pubmed_bona_sfs_tfIdf = tf_idf(pubmed_bona_sfs)
pubmed_bona_sfs_tfIdfIcf = tf_idf_icf(pubmed_bona_sfs, pubmedClass)
# TF-RF
pubmed_boaw_tfrf_tf = tf(pubmed_boaw_tfrf)
pubmed_boaw_tfrf_tfIdf = tf_idf(pubmed_boaw_tfrf)
pubmed_boaw_tfrf_tfIdfIcf = tf_idf_icf(pubmed_boaw_tfrf, pubmedClass)
pubmed_bon_tfrf_tf = tf(pubmed_bon_tfrf)
pubmed_bon_tfrf_tfIdf = tf_idf(pubmed_bon_tfrf)
pubmed_bon_tfrf_tfIdfIcf = tf_idf_icf(pubmed_bon_tfrf, pubmedClass)
pubmed_bona_tfrf_tf = tf(pubmed_bona_tfrf)
pubmed_bona_tfrf_tfIdf = tf_idf(pubmed_bona_tfrf)
pubmed_bona_tfrf_tfIdfIcf = tf_idf_icf(pubmed_bona_tfrf, pubmedClass)


# scopus
# SR
scopus_boaw_sr_tf = tf(scopus_boaw_sr)
scopus_boaw_sr_tfIdf = tf_idf(scopus_boaw_sr)
scopus_boaw_sr_tfIdfIcf = tf_idf_icf(scopus_boaw_sr, scopusClass)
scopus_bon_sr_tf = tf(scopus_bon_sr)
scopus_bon_sr_tfIdf = tf_idf(scopus_bon_sr)
scopus_bon_sr_tfIdfIcf = tf_idf_icf(scopus_bon_sr, scopusClass)
scopus_bona_sr_tf = tf(scopus_bona_sr)
scopus_bona_sr_tfIdf = tf_idf(scopus_bona_sr)
scopus_bona_sr_tfIdfIcf = tf_idf_icf(scopus_bona_sr, scopusClass)
# SFS
scopus_boaw_sfs_tf = tf(scopus_boaw_sfs)
scopus_boaw_sfs_tfIdf = tf_idf(scopus_boaw_sfs)
scopus_boaw_sfs_tfIdfIcf = tf_idf_icf(scopus_boaw_sfs, scopusClass)
scopus_bon_sfs_tf = tf(scopus_bon_sfs)
scopus_bon_sfs_tfIdf = tf_idf(scopus_bon_sfs)
scopus_bon_sfs_tfIdfIcf = tf_idf_icf(scopus_bon_sfs, scopusClass)
scopus_bona_sfs_tf = tf(scopus_bona_sfs)
scopus_bona_sfs_tfIdf = tf_idf(scopus_bona_sfs)
scopus_bona_sfs_tfIdfIcf = tf_idf_icf(scopus_bona_sfs, scopusClass)
# TF-RF
scopus_boaw_tfrf_tf = tf(scopus_boaw_tfrf)
scopus_boaw_tfrf_tfIdf = tf_idf(scopus_boaw_tfrf)
scopus_boaw_tfrf_tfIdfIcf = tf_idf_icf(scopus_boaw_tfrf, scopusClass)
scopus_bon_tfrf_tf = tf(scopus_bon_tfrf)
scopus_bon_tfrf_tfIdf = tf_idf(scopus_bon_tfrf)
scopus_bon_tfrf_tfIdfIcf = tf_idf_icf(scopus_bon_tfrf, scopusClass)
scopus_bona_tfrf_tf = tf(scopus_bona_tfrf)
scopus_bona_tfrf_tfIdf = tf_idf(scopus_bona_tfrf)
scopus_bona_tfrf_tfIdfIcf = tf_idf_icf(scopus_bona_tfrf, scopusClass)
# pubmed_bona_sr_tfIdfIcf

Clustering

In [None]:
# K-Means++
def kmeans_plus(data):
    label = []
    for k in tqdm(range(2, 11)):
        kmp = KMeans(n_clusters=k, init='k-means++')
        kmp.fit(data)
        label.append(kmp.labels_.tolist())
    return label

# kmpx = kmeans_plus(pubmed_bona_sr_tfIdfIcf)
# print(kmpx)

# # Spherical K-Means
# def spher_kmeans(data):
#     label = []
#     for k in tqdm(range(2, 11)):
#         skm = SKMeans(n_clusters=k)
#         skm.fit(data)
#         label.append(skm.get_labels())
#     return label

# skmx = spher_kmeans(pubmed_bona_sr_tfIdfIcf)
# print(skmx)

In [None]:
# import numpy as np
# from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin_min
 
def spherical_kmeans(X, n_clusters, max_iter=300):
    # Normalize the input vectors to have unit norm
    X_normalized = X / np.linalg.norm(X, axis=1, keepdims=True)
    
    # Initialize the cluster centers randomly
    kmeans = KMeans(n_clusters=n_clusters, init='random', max_iter=max_iter)
    kmeans.fit(X_normalized)
    
    # Find the closest points to each cluster center in the original space
    closest_points, _ = pairwise_distances_argmin_min(X_normalized, kmeans.cluster_centers_)
    
    return closest_points
 
# Example usage
# X = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [-1, 0, 0], [0, -1, 0], [0, 0, -1]])
# n_clusters = 2
 
cluster_indices = spherical_kmeans(pubmed_bona_sr_tfIdfIcf, 4)
print(cluster_indices)

In [7]:
# from sphereclustering import SphericalKMeans
smx = SphericalKMeans(n_clusters=4)
smx.fit(pubmed_bona_sr_tfIdfIcf)
smx.labels_



ValueError: Buffer has wrong number of dimensions (expected 2, got 1)

In [None]:
# clustering
# pubmed
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus = kmeans_plus(pubmed_boaw_sr_tf)
pubmed_boaw_sr_tfIdf_kmeansPlus = kmeans_plus(pubmed_boaw_sr_tfIdf)
pubmed_boaw_sr_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_boaw_sr_tfIdfIcf)
pubmed_bon_sr_tf_kmeansPlus = kmeans_plus(pubmed_bon_sr_tf)
pubmed_bon_sr_tfIdf_kmeansPlus = kmeans_plus(pubmed_bon_sr_tfIdf)
pubmed_bon_sr_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bon_sr_tfIdfIcf)
pubmed_bona_sr_tf_kmeansPlus = kmeans_plus(pubmed_bona_sr_tf)
pubmed_bona_sr_tfIdf_kmeansPlus = kmeans_plus(pubmed_bona_sr_tfIdf)
pubmed_bona_sr_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bona_sr_tfIdfIcf)
# SFS
pubmed_boaw_sfs_tf_kmeansPlus = kmeans_plus(pubmed_boaw_sfs_tf)
pubmed_boaw_sfs_tfIdf_kmeansPlus = kmeans_plus(pubmed_boaw_sfs_tfIdf)
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_boaw_sfs_tfIdfIcf)
pubmed_bon_sfs_tf_kmeansPlus = kmeans_plus(pubmed_bon_sfs_tf)
pubmed_bon_sfs_tfIdf_kmeansPlus = kmeans_plus(pubmed_bon_sfs_tfIdf)
pubmed_bon_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bon_sfs_tfIdfIcf)
pubmed_bona_sfs_tf_kmeansPlus = kmeans_plus(pubmed_bona_sfs_tf)
pubmed_bona_sfs_tfIdf_kmeansPlus = kmeans_plus(pubmed_bona_sfs_tfIdf)
pubmed_bona_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bona_sfs_tfIdfIcf)
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus = kmeans_plus(pubmed_boaw_tfrf_tf)
pubmed_boaw_tfrf_tfIdf_kmeansPlus = kmeans_plus(pubmed_boaw_tfrf_tfIdf)
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_boaw_tfrf_tfIdfIcf)
pubmed_bon_tfrf_tf_kmeansPlus = kmeans_plus(pubmed_bon_tfrf_tf)
pubmed_bon_tfrf_tfIdf_kmeansPlus = kmeans_plus(pubmed_bon_tfrf_tfIdf)
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bon_tfrf_tfIdfIcf)
pubmed_bona_tfrf_tf_kmeansPlus = kmeans_plus(pubmed_bona_tfrf_tf)
pubmed_bona_tfrf_tfIdf_kmeansPlus = kmeans_plus(pubmed_bona_tfrf_tfIdf)
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(pubmed_bona_tfrf_tfIdfIcf)

## Spherical K-Means
# SR
# SFS
# TF-RF


# scopus
# SR
scopus_boaw_sr_tf_kmeansPlus = kmeans_plus(scopus_boaw_sr_tf)
scopus_boaw_sr_tfIdf_kmeansPlus = kmeans_plus(scopus_boaw_sr_tfIdf)
scopus_boaw_sr_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_boaw_sr_tfIdfIcf)
scopus_bon_sr_tf_kmeansPlus = kmeans_plus(scopus_bon_sr_tf)
scopus_bon_sr_tfIdf_kmeansPlus = kmeans_plus(scopus_bon_sr_tfIdf)
scopus_bon_sr_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bon_sr_tfIdfIcf)
scopus_bona_sr_tf_kmeansPlus = kmeans_plus(scopus_bona_sr_tf)
scopus_bona_sr_tfIdf_kmeansPlus = kmeans_plus(scopus_bona_sr_tfIdf)
scopus_bona_sr_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bona_sr_tfIdfIcf)
# SFS
scopus_boaw_sfs_tf_kmeansPlus = kmeans_plus(scopus_boaw_sfs_tf)
scopus_boaw_sfs_tfIdf_kmeansPlus = kmeans_plus(scopus_boaw_sfs_tfIdf)
scopus_boaw_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_boaw_sfs_tfIdfIcf)
scopus_bon_sfs_tf_kmeansPlus = kmeans_plus(scopus_bon_sfs_tf)
scopus_bon_sfs_tfIdf_kmeansPlus = kmeans_plus(scopus_bon_sfs_tfIdf)
scopus_bon_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bon_sfs_tfIdfIcf)
scopus_bona_sfs_tf_kmeansPlus = kmeans_plus(scopus_bona_sfs_tf)
scopus_bona_sfs_tfIdf_kmeansPlus = kmeans_plus(scopus_bona_sfs_tfIdf)
scopus_bona_sfs_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bona_sfs_tfIdfIcf)
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus = kmeans_plus(scopus_boaw_tfrf_tf)
scopus_boaw_tfrf_tfIdf_kmeansPlus = kmeans_plus(scopus_boaw_tfrf_tfIdf)
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_boaw_tfrf_tfIdfIcf)
scopus_bon_tfrf_tf_kmeansPlus = kmeans_plus(scopus_bon_tfrf_tf)
scopus_bon_tfrf_tfIdf_kmeansPlus = kmeans_plus(scopus_bon_tfrf_tfIdf)
scopus_bon_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bon_tfrf_tfIdfIcf)
scopus_bona_tfrf_tf_kmeansPlus = kmeans_plus(scopus_bona_tfrf_tf)
scopus_bona_tfrf_tfIdf_kmeansPlus = kmeans_plus(scopus_bona_tfrf_tfIdf)
scopus_bona_tfrf_tfIdfIcf_kmeansPlus = kmeans_plus(scopus_bona_tfrf_tfIdfIcf)

## Spherical K-Means
# SR
# SFS
# TF-RF

# pubmed_bona_sr_tfIdfIcf_kmeansPlus

In [None]:
# save json
# pubmed
pubmedJsonClustering = {
    ## K-Means++
    # SR
    'boaw_sr_tf_kmeansPlus': pubmed_boaw_sr_tf_kmeansPlus,
    'boaw_sr_tfIdf_kmeansPlus': pubmed_boaw_sr_tfIdf_kmeansPlus,
    'boaw_sr_tfIdfIcf_kmeansPlus': pubmed_boaw_sr_tfIdfIcf_kmeansPlus,
    'bon_sr_tf_kmeansPlus': pubmed_bon_sr_tf_kmeansPlus,
    'bon_sr_tfIdf_kmeansPlus': pubmed_bon_sr_tfIdf_kmeansPlus,
    'bon_sr_tfIdfIcf_kmeansPlus': pubmed_bon_sr_tfIdfIcf_kmeansPlus,
    'bona_sr_tf_kmeansPlus': pubmed_bona_sr_tf_kmeansPlus,
    'bona_sr_tfIdf_kmeansPlus': pubmed_bona_sr_tfIdf_kmeansPlus,
    'bona_sr_tfIdfIcf_kmeansPlus': pubmed_bona_sr_tfIdfIcf_kmeansPlus,
    # SFS
    'boaw_sfs_tf_kmeansPlus': pubmed_boaw_sfs_tf_kmeansPlus,
    'boaw_sfs_tfIdf_kmeansPlus': pubmed_boaw_sfs_tfIdf_kmeansPlus,
    'boaw_sfs_tfIdfIcf_kmeansPlus': pubmed_boaw_sfs_tfIdfIcf_kmeansPlus,
    'bon_sfs_tf_kmeansPlus': pubmed_bon_sfs_tf_kmeansPlus,
    'bon_sfs_tfIdf_kmeansPlus': pubmed_bon_sfs_tfIdf_kmeansPlus,
    'bon_sfs_tfIdfIcf_kmeansPlus': pubmed_bon_sfs_tfIdfIcf_kmeansPlus,
    'bona_sfs_tf_kmeansPlus': pubmed_bona_sfs_tf_kmeansPlus,
    'bona_sfs_tfIdf_kmeansPlus': pubmed_bona_sfs_tfIdf_kmeansPlus,
    'bona_sfs_tfIdfIcf_kmeansPlus': pubmed_bona_sfs_tfIdfIcf_kmeansPlus,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus': pubmed_boaw_tfrf_tf_kmeansPlus,
    'boaw_tfrf_tfIdf_kmeansPlus': pubmed_boaw_tfrf_tfIdf_kmeansPlus,
    'boaw_tfrf_tfIdfIcf_kmeansPlus': pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus,
    'bon_tfrf_tf_kmeansPlus': pubmed_bon_tfrf_tf_kmeansPlus,
    'bon_tfrf_tfIdf_kmeansPlus': pubmed_bon_tfrf_tfIdf_kmeansPlus,
    'bon_tfrf_tfIdfIcf_kmeansPlus': pubmed_bon_tfrf_tfIdfIcf_kmeansPlus,
    'bona_tfrf_tf_kmeansPlus': pubmed_bona_tfrf_tf_kmeansPlus,
    'bona_tfrf_tfIdf_kmeansPlus': pubmed_bona_tfrf_tfIdf_kmeansPlus,
    'bona_tfrf_tfIdfIcf_kmeansPlus': pubmed_bona_tfrf_tfIdfIcf_kmeansPlus,
    ## Spherical K-Means
}
with open('pubmed/clustering.json', 'w', encoding='utf-8') as f:
    json.dump(pubmedJsonClustering, f, indent=4)
f.close() 


# scopus
scopusJsonClustering = {
    ## K-Means++
    # SR
    'boaw_sr_tf_kmeansPlus': scopus_boaw_sr_tf_kmeansPlus,
    'boaw_sr_tfIdf_kmeansPlus': scopus_boaw_sr_tfIdf_kmeansPlus,
    'boaw_sr_tfIdfIcf_kmeansPlus': scopus_boaw_sr_tfIdfIcf_kmeansPlus,
    'bon_sr_tf_kmeansPlus': scopus_bon_sr_tf_kmeansPlus,
    'bon_sr_tfIdf_kmeansPlus': scopus_bon_sr_tfIdf_kmeansPlus,
    'bon_sr_tfIdfIcf_kmeansPlus': scopus_bon_sr_tfIdfIcf_kmeansPlus,
    'bona_sr_tf_kmeansPlus': scopus_bona_sr_tf_kmeansPlus,
    'bona_sr_tfIdf_kmeansPlus': scopus_bona_sr_tfIdf_kmeansPlus,
    'bona_sr_tfIdfIcf_kmeansPlus': scopus_bona_sr_tfIdfIcf_kmeansPlus,
    # SFS
    'boaw_sfs_tf_kmeansPlus': scopus_boaw_sfs_tf_kmeansPlus,
    'boaw_sfs_tfIdf_kmeansPlus': scopus_boaw_sfs_tfIdf_kmeansPlus,
    'boaw_sfs_tfIdfIcf_kmeansPlus': scopus_boaw_sfs_tfIdfIcf_kmeansPlus,
    'bon_sfs_tf_kmeansPlus': scopus_bon_sfs_tf_kmeansPlus,
    'bon_sfs_tfIdf_kmeansPlus': scopus_bon_sfs_tfIdf_kmeansPlus,
    'bon_sfs_tfIdfIcf_kmeansPlus': scopus_bon_sfs_tfIdfIcf_kmeansPlus,
    'bona_sfs_tf_kmeansPlus': scopus_bona_sfs_tf_kmeansPlus,
    'bona_sfs_tfIdf_kmeansPlus': scopus_bona_sfs_tfIdf_kmeansPlus,
    'bona_sfs_tfIdfIcf_kmeansPlus': scopus_bona_sfs_tfIdfIcf_kmeansPlus,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus': scopus_boaw_tfrf_tf_kmeansPlus,
    'boaw_tfrf_tfIdf_kmeansPlus': scopus_boaw_tfrf_tfIdf_kmeansPlus,
    'boaw_tfrf_tfIdfIcf_kmeansPlus': scopus_boaw_tfrf_tfIdfIcf_kmeansPlus,
    'bon_tfrf_tf_kmeansPlus': scopus_bon_tfrf_tf_kmeansPlus,
    'bon_tfrf_tfIdf_kmeansPlus': scopus_bon_tfrf_tfIdf_kmeansPlus,
    'bon_tfrf_tfIdfIcf_kmeansPlus': scopus_bon_tfrf_tfIdfIcf_kmeansPlus,
    'bona_tfrf_tf_kmeansPlus': scopus_bona_tfrf_tf_kmeansPlus,
    'bona_tfrf_tfIdf_kmeansPlus': scopus_bona_tfrf_tfIdf_kmeansPlus,
    'bona_tfrf_tfIdfIcf_kmeansPlus': scopus_bona_tfrf_tfIdfIcf_kmeansPlus,
    ## Spherical K-Means
}
with open('scopus/clustering.json', 'w', encoding='utf-8') as f:
    json.dump(scopusJsonClustering, f, indent=4)
f.close() 


In [None]:
# read json clustering
# pubmed
with open('pubmed/clustering.json', 'r', encoding='utf-8') as f:
    pubmedJsonClustering = json.load(f)
f.close() 
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sr_tf_kmeansPlus'])
pubmed_boaw_sr_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sr_tfIdf_kmeansPlus'])
pubmed_boaw_sr_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sr_tfIdfIcf_kmeansPlus'])
pubmed_bon_sr_tf_kmeansPlus = np.array(pubmedJsonClustering['bon_sr_tf_kmeansPlus'])
pubmed_bon_sr_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bon_sr_tfIdf_kmeansPlus'])
pubmed_bon_sr_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bon_sr_tfIdfIcf_kmeansPlus'])
pubmed_bona_sr_tf_kmeansPlus = np.array(pubmedJsonClustering['bona_sr_tf_kmeansPlus'])
pubmed_bona_sr_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bona_sr_tfIdf_kmeansPlus'])
pubmed_bona_sr_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bona_sr_tfIdfIcf_kmeansPlus'])
# SFS
pubmed_boaw_sfs_tf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sfs_tf_kmeansPlus'])
pubmed_boaw_sfs_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sfs_tfIdf_kmeansPlus'])
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['boaw_sfs_tfIdfIcf_kmeansPlus'])
pubmed_bon_sfs_tf_kmeansPlus = np.array(pubmedJsonClustering['bon_sfs_tf_kmeansPlus'])
pubmed_bon_sfs_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bon_sfs_tfIdf_kmeansPlus'])
pubmed_bon_sfs_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bon_sfs_tfIdfIcf_kmeansPlus'])
pubmed_bona_sfs_tf_kmeansPlus = np.array(pubmedJsonClustering['bona_sfs_tf_kmeansPlus'])
pubmed_bona_sfs_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bona_sfs_tfIdf_kmeansPlus']) 
pubmed_bona_sfs_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bona_sfs_tfIdfIcf_kmeansPlus'])
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus = np.array(pubmedJsonClustering['boaw_tfrf_tf_kmeansPlus'])
pubmed_boaw_tfrf_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['boaw_tfrf_tfIdf_kmeansPlus'])
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['boaw_tfrf_tfIdfIcf_kmeansPlus'])
pubmed_bon_tfrf_tf_kmeansPlus = np.array(pubmedJsonClustering['bon_tfrf_tf_kmeansPlus'])
pubmed_bon_tfrf_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bon_tfrf_tfIdf_kmeansPlus'])
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bon_tfrf_tfIdfIcf_kmeansPlus'])
pubmed_bona_tfrf_tf_kmeansPlus = np.array(pubmedJsonClustering['bona_tfrf_tf_kmeansPlus'])
pubmed_bona_tfrf_tfIdf_kmeansPlus = np.array(pubmedJsonClustering['bona_tfrf_tfIdf_kmeansPlus'])
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus = np.array(pubmedJsonClustering['bona_tfrf_tfIdfIcf_kmeansPlus'])

## Spherical K-Means


# scopus
with open('scopus/clustering.json', 'r', encoding='utf-8') as f:
    scopusJsonClustering = json.load(f)
f.close()
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus = np.array(scopusJsonClustering['boaw_sr_tf_kmeansPlus'])
scopus_boaw_sr_tfIdf_kmeansPlus = np.array(scopusJsonClustering['boaw_sr_tfIdf_kmeansPlus'])
scopus_boaw_sr_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['boaw_sr_tfIdfIcf_kmeansPlus'])
scopus_bon_sr_tf_kmeansPlus = np.array(scopusJsonClustering['bon_sr_tf_kmeansPlus'])
scopus_bon_sr_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bon_sr_tfIdf_kmeansPlus'])
scopus_bon_sr_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bon_sr_tfIdfIcf_kmeansPlus'])
scopus_bona_sr_tf_kmeansPlus = np.array(scopusJsonClustering['bona_sr_tf_kmeansPlus'])
scopus_bona_sr_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bona_sr_tfIdf_kmeansPlus'])
scopus_bona_sr_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bona_sr_tfIdfIcf_kmeansPlus'])
# SFS
scopus_boaw_sfs_tf_kmeansPlus = np.array(scopusJsonClustering['boaw_sfs_tf_kmeansPlus'])
scopus_boaw_sfs_tfIdf_kmeansPlus = np.array(scopusJsonClustering['boaw_sfs_tfIdf_kmeansPlus'])
scopus_boaw_sfs_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['boaw_sfs_tfIdfIcf_kmeansPlus'])
scopus_bon_sfs_tf_kmeansPlus = np.array(scopusJsonClustering['bon_sfs_tf_kmeansPlus'])
scopus_bon_sfs_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bon_sfs_tfIdf_kmeansPlus'])
scopus_bon_sfs_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bon_sfs_tfIdfIcf_kmeansPlus'])
scopus_bona_sfs_tf_kmeansPlus = np.array(scopusJsonClustering['bona_sfs_tf_kmeansPlus'])
scopus_bona_sfs_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bona_sfs_tfIdf_kmeansPlus']) 
scopus_bona_sfs_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bona_sfs_tfIdfIcf_kmeansPlus'])
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus = np.array(scopusJsonClustering['boaw_tfrf_tf_kmeansPlus'])
scopus_boaw_tfrf_tfIdf_kmeansPlus = np.array(scopusJsonClustering['boaw_tfrf_tfIdf_kmeansPlus'])
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['boaw_tfrf_tfIdfIcf_kmeansPlus'])
scopus_bon_tfrf_tf_kmeansPlus = np.array(scopusJsonClustering['bon_tfrf_tf_kmeansPlus'])
scopus_bon_tfrf_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bon_tfrf_tfIdf_kmeansPlus'])
scopus_bon_tfrf_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bon_tfrf_tfIdfIcf_kmeansPlus'])
scopus_bona_tfrf_tf_kmeansPlus = np.array(scopusJsonClustering['bona_tfrf_tf_kmeansPlus'])
scopus_bona_tfrf_tfIdf_kmeansPlus = np.array(scopusJsonClustering['bona_tfrf_tfIdf_kmeansPlus'])
scopus_bona_tfrf_tfIdfIcf_kmeansPlus = np.array(scopusJsonClustering['bona_tfrf_tfIdfIcf_kmeansPlus'])

## Spherical K-Means

Performance Analysis

In [None]:
# encode class as label number
def encodeClass(classes):
    # encode class
    clsVec = LabelEncoder()
    cls = clsVec.fit_transform(classes)
    # add 1 to all list (to compare with label)
    return cls + 1
pubmedLabel = encodeClass(pubmedClass)
scopusLabel = encodeClass(scopusClass)

# Silhouette
def silhouette(documentWeight, clusterLabel):
    score = []
    for k in tqdm(range(len(clusterLabel))):
        silhouetteScore = silhouette_score(documentWeight, clusterLabel[k])
        score.append(silhouetteScore)
    return score

# sx = silhouette(pubmed_bona_sr_tfIdfIcf, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# print(sx)

# Purity
def purity(documentLabel, clusterLabel):
    score = []
    for k in tqdm(range(len(clusterLabel))):
        contingencyMatrix = contingency_matrix(documentLabel, clusterLabel[k])
        purityScore = np.sum(np.amax(contingencyMatrix, axis=0)) / np.sum(contingencyMatrix)
        score.append(purityScore)
    return score

# px = purity(pubmedLabel, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# print(px)

# AMI
def ami(documentLabel, clusterLabel):
    score = []
    for k in tqdm(range(len(clusterLabel))):
        amiScore = adjusted_mutual_info_score(documentLabel, clusterLabel[k])
        score.append(amiScore)
    return score

# ax = ami(pubmedLabel, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# print(ax)

In [None]:
# performance analysis silhouette
# pubmed
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sr_tf, pubmed_boaw_sr_tf_kmeansPlus)
pubmed_boaw_sr_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sr_tfIdf, pubmed_boaw_sr_tfIdf_kmeansPlus)
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sr_tfIdfIcf, pubmed_boaw_sr_tfIdfIcf_kmeansPlus)
pubmed_bon_sr_tf_kmeansPlus_silhouette = silhouette(pubmed_bon_sr_tf, pubmed_bon_sr_tf_kmeansPlus)
pubmed_bon_sr_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bon_sr_tfIdf, pubmed_bon_sr_tfIdf_kmeansPlus)
pubmed_bon_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bon_sr_tfIdfIcf, pubmed_bon_sr_tfIdfIcf_kmeansPlus)
pubmed_bona_sr_tf_kmeansPlus_silhouette = silhouette(pubmed_bona_sr_tf, pubmed_bona_sr_tf_kmeansPlus)
pubmed_bona_sr_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bona_sr_tfIdf, pubmed_bona_sr_tfIdf_kmeansPlus)
pubmed_bona_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bona_sr_tfIdfIcf, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sfs_tf, pubmed_boaw_sfs_tf_kmeansPlus)
pubmed_boaw_sfs_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sfs_tfIdf, pubmed_boaw_sfs_tfIdf_kmeansPlus)
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_boaw_sfs_tfIdfIcf, pubmed_boaw_sfs_tfIdfIcf_kmeansPlus)
pubmed_bon_sfs_tf_kmeansPlus_silhouette = silhouette(pubmed_bon_sfs_tf, pubmed_bon_sfs_tf_kmeansPlus)
pubmed_bon_sfs_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bon_sfs_tfIdf, pubmed_bon_sfs_tfIdf_kmeansPlus)
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bon_sfs_tfIdfIcf, pubmed_bon_sfs_tfIdfIcf_kmeansPlus)
pubmed_bona_sfs_tf_kmeansPlus_silhouette = silhouette(pubmed_bona_sfs_tf, pubmed_bona_sfs_tf_kmeansPlus)
pubmed_bona_sfs_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bona_sfs_tfIdf, pubmed_bona_sfs_tfIdf_kmeansPlus)
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bona_sfs_tfIdfIcf, pubmed_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_silhouette = silhouette(pubmed_boaw_tfrf_tf, pubmed_boaw_tfrf_tf_kmeansPlus)
pubmed_boaw_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_boaw_tfrf_tfIdf, pubmed_boaw_tfrf_tfIdf_kmeansPlus)
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_boaw_tfrf_tfIdfIcf, pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bon_tfrf_tf_kmeansPlus_silhouette = silhouette(pubmed_bon_tfrf_tf, pubmed_bon_tfrf_tf_kmeansPlus)
pubmed_bon_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bon_tfrf_tfIdf, pubmed_bon_tfrf_tfIdf_kmeansPlus)
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bon_tfrf_tfIdfIcf, pubmed_bon_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bona_tfrf_tf_kmeansPlus_silhouette = silhouette(pubmed_bona_tfrf_tf, pubmed_bona_tfrf_tf_kmeansPlus)
pubmed_bona_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(pubmed_bona_tfrf_tfIdf, pubmed_bona_tfrf_tfIdf_kmeansPlus)
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(pubmed_bona_tfrf_tfIdfIcf, pubmed_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF


# scopus
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_silhouette = silhouette(scopus_boaw_sr_tf, scopus_boaw_sr_tf_kmeansPlus)
scopus_boaw_sr_tfIdf_kmeansPlus_silhouette = silhouette(scopus_boaw_sr_tfIdf, scopus_boaw_sr_tfIdf_kmeansPlus)
scopus_boaw_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_boaw_sr_tfIdfIcf, scopus_boaw_sr_tfIdfIcf_kmeansPlus)
scopus_bon_sr_tf_kmeansPlus_silhouette = silhouette(scopus_bon_sr_tf, scopus_bon_sr_tf_kmeansPlus)
scopus_bon_sr_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bon_sr_tfIdf, scopus_bon_sr_tfIdf_kmeansPlus)
scopus_bon_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bon_sr_tfIdfIcf, scopus_bon_sr_tfIdfIcf_kmeansPlus)
scopus_bona_sr_tf_kmeansPlus_silhouette = silhouette(scopus_bona_sr_tf, scopus_bona_sr_tf_kmeansPlus)
scopus_bona_sr_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bona_sr_tfIdf, scopus_bona_sr_tfIdf_kmeansPlus)
scopus_bona_sr_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bona_sr_tfIdfIcf, scopus_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
scopus_boaw_sfs_tf_kmeansPlus_silhouette = silhouette(scopus_boaw_sfs_tf, scopus_boaw_sfs_tf_kmeansPlus)
scopus_boaw_sfs_tfIdf_kmeansPlus_silhouette = silhouette(scopus_boaw_sfs_tfIdf, scopus_boaw_sfs_tfIdf_kmeansPlus)
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_boaw_sfs_tfIdfIcf, scopus_boaw_sfs_tfIdfIcf_kmeansPlus)
scopus_bon_sfs_tf_kmeansPlus_silhouette = silhouette(scopus_bon_sfs_tf, scopus_bon_sfs_tf_kmeansPlus)
scopus_bon_sfs_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bon_sfs_tfIdf, scopus_bon_sfs_tfIdf_kmeansPlus)
scopus_bon_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bon_sfs_tfIdfIcf, scopus_bon_sfs_tfIdfIcf_kmeansPlus)
scopus_bona_sfs_tf_kmeansPlus_silhouette = silhouette(scopus_bona_sfs_tf, scopus_bona_sfs_tf_kmeansPlus)
scopus_bona_sfs_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bona_sfs_tfIdf, scopus_bona_sfs_tfIdf_kmeansPlus)
scopus_bona_sfs_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bona_sfs_tfIdfIcf, scopus_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_silhouette = silhouette(scopus_boaw_tfrf_tf, scopus_boaw_tfrf_tf_kmeansPlus)
scopus_boaw_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(scopus_boaw_tfrf_tfIdf, scopus_boaw_tfrf_tfIdf_kmeansPlus)
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_boaw_tfrf_tfIdfIcf, scopus_boaw_tfrf_tfIdfIcf_kmeansPlus)
scopus_bon_tfrf_tf_kmeansPlus_silhouette = silhouette(scopus_bon_tfrf_tf, scopus_bon_tfrf_tf_kmeansPlus)
scopus_bon_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bon_tfrf_tfIdf, scopus_bon_tfrf_tfIdf_kmeansPlus)
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bon_tfrf_tfIdfIcf, scopus_bon_tfrf_tfIdfIcf_kmeansPlus)
scopus_bona_tfrf_tf_kmeansPlus_silhouette = silhouette(scopus_bona_tfrf_tf, scopus_bona_tfrf_tf_kmeansPlus)
scopus_bona_tfrf_tfIdf_kmeansPlus_silhouette = silhouette(scopus_bona_tfrf_tfIdf, scopus_bona_tfrf_tfIdf_kmeansPlus)
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette = silhouette(scopus_bona_tfrf_tfIdfIcf, scopus_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF

In [None]:
# performance analysis purity
# pubmed
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sr_tf_kmeansPlus)
pubmed_boaw_sr_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sr_tfIdf_kmeansPlus)
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sr_tfIdfIcf_kmeansPlus)
pubmed_bon_sr_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sr_tf_kmeansPlus)
pubmed_bon_sr_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sr_tfIdf_kmeansPlus)
pubmed_bon_sr_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sr_tfIdfIcf_kmeansPlus)
pubmed_bona_sr_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sr_tf_kmeansPlus)
pubmed_bona_sr_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sr_tfIdf_kmeansPlus)
pubmed_bona_sr_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sfs_tf_kmeansPlus)
pubmed_boaw_sfs_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sfs_tfIdf_kmeansPlus)
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_sfs_tfIdfIcf_kmeansPlus)
pubmed_bon_sfs_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sfs_tf_kmeansPlus)
pubmed_bon_sfs_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sfs_tfIdf_kmeansPlus)
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_sfs_tfIdfIcf_kmeansPlus)
pubmed_bona_sfs_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sfs_tf_kmeansPlus)
pubmed_bona_sfs_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sfs_tfIdf_kmeansPlus)
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_tfrf_tf_kmeansPlus)
pubmed_boaw_tfrf_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_tfrf_tfIdf_kmeansPlus)
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bon_tfrf_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_tfrf_tf_kmeansPlus)
pubmed_bon_tfrf_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_tfrf_tfIdf_kmeansPlus)
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bon_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bona_tfrf_tf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_tfrf_tf_kmeansPlus)
pubmed_bona_tfrf_tfIdf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_tfrf_tfIdf_kmeansPlus)
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_purity = purity(pubmedLabel, pubmed_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF


# scopus
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sr_tf_kmeansPlus)
scopus_boaw_sr_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sr_tfIdf_kmeansPlus)
scopus_boaw_sr_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sr_tfIdfIcf_kmeansPlus)
scopus_bon_sr_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sr_tf_kmeansPlus)
scopus_bon_sr_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sr_tfIdf_kmeansPlus)
scopus_bon_sr_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sr_tfIdfIcf_kmeansPlus)
scopus_bona_sr_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sr_tf_kmeansPlus)
scopus_bona_sr_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sr_tfIdf_kmeansPlus)
scopus_bona_sr_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
scopus_boaw_sfs_tf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sfs_tf_kmeansPlus)
scopus_boaw_sfs_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sfs_tfIdf_kmeansPlus)
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_sfs_tfIdfIcf_kmeansPlus)
scopus_bon_sfs_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sfs_tf_kmeansPlus)
scopus_bon_sfs_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sfs_tfIdf_kmeansPlus)
scopus_bon_sfs_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_sfs_tfIdfIcf_kmeansPlus)
scopus_bona_sfs_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sfs_tf_kmeansPlus)
scopus_bona_sfs_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sfs_tfIdf_kmeansPlus)
scopus_bona_sfs_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_tfrf_tf_kmeansPlus)
scopus_boaw_tfrf_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_tfrf_tfIdf_kmeansPlus)
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_boaw_tfrf_tfIdfIcf_kmeansPlus)
scopus_bon_tfrf_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_tfrf_tf_kmeansPlus)
scopus_bon_tfrf_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_tfrf_tfIdf_kmeansPlus)
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bon_tfrf_tfIdfIcf_kmeansPlus)
scopus_bona_tfrf_tf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_tfrf_tf_kmeansPlus)
scopus_bona_tfrf_tfIdf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_tfrf_tfIdf_kmeansPlus)
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_purity = purity(scopusLabel, scopus_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF

In [None]:
# performance analysis ami
# pubmed
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sr_tf_kmeansPlus)
pubmed_boaw_sr_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sr_tfIdf_kmeansPlus)
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sr_tfIdfIcf_kmeansPlus)
pubmed_bon_sr_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sr_tf_kmeansPlus)
pubmed_bon_sr_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sr_tfIdf_kmeansPlus)
pubmed_bon_sr_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sr_tfIdfIcf_kmeansPlus)
pubmed_bona_sr_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sr_tf_kmeansPlus)
pubmed_bona_sr_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sr_tfIdf_kmeansPlus)
pubmed_bona_sr_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sfs_tf_kmeansPlus)
pubmed_boaw_sfs_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sfs_tfIdf_kmeansPlus)
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_sfs_tfIdfIcf_kmeansPlus)
pubmed_bon_sfs_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sfs_tf_kmeansPlus)
pubmed_bon_sfs_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sfs_tfIdf_kmeansPlus)
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_sfs_tfIdfIcf_kmeansPlus)
pubmed_bona_sfs_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sfs_tf_kmeansPlus)
pubmed_bona_sfs_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sfs_tfIdf_kmeansPlus)
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_tfrf_tf_kmeansPlus)
pubmed_boaw_tfrf_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_tfrf_tfIdf_kmeansPlus)
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bon_tfrf_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_tfrf_tf_kmeansPlus)
pubmed_bon_tfrf_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_tfrf_tfIdf_kmeansPlus)
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bon_tfrf_tfIdfIcf_kmeansPlus)
pubmed_bona_tfrf_tf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_tfrf_tf_kmeansPlus)
pubmed_bona_tfrf_tfIdf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_tfrf_tfIdf_kmeansPlus)
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_ami = ami(pubmedLabel, pubmed_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF


# scopus
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sr_tf_kmeansPlus)
scopus_boaw_sr_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sr_tfIdf_kmeansPlus)
scopus_boaw_sr_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sr_tfIdfIcf_kmeansPlus)
scopus_bon_sr_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sr_tf_kmeansPlus)
scopus_bon_sr_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sr_tfIdf_kmeansPlus)
scopus_bon_sr_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sr_tfIdfIcf_kmeansPlus)
scopus_bona_sr_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sr_tf_kmeansPlus)
scopus_bona_sr_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sr_tfIdf_kmeansPlus)
scopus_bona_sr_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sr_tfIdfIcf_kmeansPlus)
# SFS
scopus_boaw_sfs_tf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sfs_tf_kmeansPlus)
scopus_boaw_sfs_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sfs_tfIdf_kmeansPlus)
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_sfs_tfIdfIcf_kmeansPlus)
scopus_bon_sfs_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sfs_tf_kmeansPlus)
scopus_bon_sfs_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sfs_tfIdf_kmeansPlus)
scopus_bon_sfs_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_sfs_tfIdfIcf_kmeansPlus)
scopus_bona_sfs_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sfs_tf_kmeansPlus)
scopus_bona_sfs_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sfs_tfIdf_kmeansPlus)
scopus_bona_sfs_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_sfs_tfIdfIcf_kmeansPlus)
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_tfrf_tf_kmeansPlus)
scopus_boaw_tfrf_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_tfrf_tfIdf_kmeansPlus)
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_boaw_tfrf_tfIdfIcf_kmeansPlus)
scopus_bon_tfrf_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_tfrf_tf_kmeansPlus)
scopus_bon_tfrf_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_tfrf_tfIdf_kmeansPlus)
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bon_tfrf_tfIdfIcf_kmeansPlus)
scopus_bona_tfrf_tf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_tfrf_tf_kmeansPlus)
scopus_bona_tfrf_tfIdf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_tfrf_tfIdf_kmeansPlus)
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_ami = ami(scopusLabel, scopus_bona_tfrf_tfIdfIcf_kmeansPlus)

## Spherical K-Means
# SR
# SFS
# TF-RF

In [None]:
# save json silhouette
# pubmed
pubmedJsonSilhouette = {
    ## K-Means++
    # SR
    'boaw_sr_tf_kmeansPlus_silhouette': pubmed_boaw_sr_tf_kmeansPlus_silhouette,
    'boaw_sr_tfIdf_kmeansPlus_silhouette': pubmed_boaw_sr_tfIdf_kmeansPlus_silhouette,
    'boaw_sr_tfIdfIcf_kmeansPlus_silhouette': pubmed_boaw_sr_tfIdfIcf_kmeansPlus_silhouette,
    'bon_sr_tf_kmeansPlus_silhouette': pubmed_bon_sr_tf_kmeansPlus_silhouette,
    'bon_sr_tfIdf_kmeansPlus_silhouette': pubmed_bon_sr_tfIdf_kmeansPlus_silhouette,
    'bon_sr_tfIdfIcf_kmeansPlus_silhouette': pubmed_bon_sr_tfIdfIcf_kmeansPlus_silhouette,
    'bona_sr_tf_kmeansPlus_silhouette': pubmed_bona_sr_tf_kmeansPlus_silhouette,
    'bona_sr_tfIdf_kmeansPlus_silhouette': pubmed_bona_sr_tfIdf_kmeansPlus_silhouette,
    'bona_sr_tfIdfIcf_kmeansPlus_silhouette': pubmed_bona_sr_tfIdfIcf_kmeansPlus_silhouette,
    # SFS
    'boaw_sfs_tf_kmeansPlus_silhouette': pubmed_boaw_sfs_tf_kmeansPlus_silhouette,
    'boaw_sfs_tfIdf_kmeansPlus_silhouette': pubmed_boaw_sfs_tfIdf_kmeansPlus_silhouette,
    'boaw_sfs_tfIdfIcf_kmeansPlus_silhouette': pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette,
    'bon_sfs_tf_kmeansPlus_silhouette': pubmed_bon_sfs_tf_kmeansPlus_silhouette,
    'bon_sfs_tfIdf_kmeansPlus_silhouette': pubmed_bon_sfs_tfIdf_kmeansPlus_silhouette,
    'bon_sfs_tfIdfIcf_kmeansPlus_silhouette': pubmed_bon_sfs_tfIdfIcf_kmeansPlus_silhouette,
    'bona_sfs_tf_kmeansPlus_silhouette': pubmed_bona_sfs_tf_kmeansPlus_silhouette,
    'bona_sfs_tfIdf_kmeansPlus_silhouette': pubmed_bona_sfs_tfIdf_kmeansPlus_silhouette,
    'bona_sfs_tfIdfIcf_kmeansPlus_silhouette': pubmed_bona_sfs_tfIdfIcf_kmeansPlus_silhouette,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_silhouette': pubmed_boaw_tfrf_tf_kmeansPlus_silhouette,
    'boaw_tfrf_tfIdf_kmeansPlus_silhouette': pubmed_boaw_tfrf_tfIdf_kmeansPlus_silhouette,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette': pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    'bon_tfrf_tf_kmeansPlus_silhouette': pubmed_bon_tfrf_tf_kmeansPlus_silhouette,
    'bon_tfrf_tfIdf_kmeansPlus_silhouette': pubmed_bon_tfrf_tfIdf_kmeansPlus_silhouette,
    'bon_tfrf_tfIdfIcf_kmeansPlus_silhouette': pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    'bona_tfrf_tf_kmeansPlus_silhouette': pubmed_bona_tfrf_tf_kmeansPlus_silhouette,
    'bona_tfrf_tfIdf_kmeansPlus_silhouette': pubmed_bona_tfrf_tfIdf_kmeansPlus_silhouette,
    'bona_tfrf_tfIdfIcf_kmeansPlus_silhouette': pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    ## Spherical K-Means
}
with open('pubmed/silhouette.json', 'w', encoding='utf-8') as f:
    json.dump(pubmedJsonSilhouette, f, indent=4)
f.close() 



# scopus
## K-Means++
scopusJsonSilhouette = {
    # SR
    'boaw_sr_tf_kmeansPlus_silhouette': scopus_boaw_sr_tf_kmeansPlus_silhouette,
    'boaw_sr_tfIdf_kmeansPlus_silhouette': scopus_boaw_sr_tfIdf_kmeansPlus_silhouette,
    'boaw_sr_tfIdfIcf_kmeansPlus_silhouette': scopus_boaw_sr_tfIdfIcf_kmeansPlus_silhouette,
    'bon_sr_tf_kmeansPlus_silhouette': scopus_bon_sr_tf_kmeansPlus_silhouette,
    'bon_sr_tfIdf_kmeansPlus_silhouette': scopus_bon_sr_tfIdf_kmeansPlus_silhouette,
    'bon_sr_tfIdfIcf_kmeansPlus_silhouette': scopus_bon_sr_tfIdfIcf_kmeansPlus_silhouette,
    'bona_sr_tf_kmeansPlus_silhouette': scopus_bona_sr_tf_kmeansPlus_silhouette,
    'bona_sr_tfIdf_kmeansPlus_silhouette': scopus_bona_sr_tfIdf_kmeansPlus_silhouette,
    'bona_sr_tfIdfIcf_kmeansPlus_silhouette': scopus_bona_sr_tfIdfIcf_kmeansPlus_silhouette,
    # SFS
    'boaw_sfs_tf_kmeansPlus_silhouette': scopus_boaw_sfs_tf_kmeansPlus_silhouette,
    'boaw_sfs_tfIdf_kmeansPlus_silhouette': scopus_boaw_sfs_tfIdf_kmeansPlus_silhouette,
    'boaw_sfs_tfIdfIcf_kmeansPlus_silhouette': scopus_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette,
    'bon_sfs_tf_kmeansPlus_silhouette': scopus_bon_sfs_tf_kmeansPlus_silhouette,
    'bon_sfs_tfIdf_kmeansPlus_silhouette': scopus_bon_sfs_tfIdf_kmeansPlus_silhouette,
    'bon_sfs_tfIdfIcf_kmeansPlus_silhouette': scopus_bon_sfs_tfIdfIcf_kmeansPlus_silhouette,
    'bona_sfs_tf_kmeansPlus_silhouette': scopus_bona_sfs_tf_kmeansPlus_silhouette,
    'bona_sfs_tfIdf_kmeansPlus_silhouette': scopus_bona_sfs_tfIdf_kmeansPlus_silhouette,
    'bona_sfs_tfIdfIcf_kmeansPlus_silhouette': scopus_bona_sfs_tfIdfIcf_kmeansPlus_silhouette,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_silhouette': scopus_boaw_tfrf_tf_kmeansPlus_silhouette,
    'boaw_tfrf_tfIdf_kmeansPlus_silhouette': scopus_boaw_tfrf_tfIdf_kmeansPlus_silhouette,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette': scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    'bon_tfrf_tf_kmeansPlus_silhouette': scopus_bon_tfrf_tf_kmeansPlus_silhouette,
    'bon_tfrf_tfIdf_kmeansPlus_silhouette': scopus_bon_tfrf_tfIdf_kmeansPlus_silhouette,
    'bon_tfrf_tfIdfIcf_kmeansPlus_silhouette': scopus_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    'bona_tfrf_tf_kmeansPlus_silhouette': scopus_bona_tfrf_tf_kmeansPlus_silhouette,
    'bona_tfrf_tfIdf_kmeansPlus_silhouette': scopus_bona_tfrf_tfIdf_kmeansPlus_silhouette,
    'bona_tfrf_tfIdfIcf_kmeansPlus_silhouette': scopus_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette,
    ## Spherical K-Means
}
with open('scopus/silhouette.json', 'w', encoding='utf-8') as f:
    json.dump(scopusJsonSilhouette, f, indent=4)
f.close() 

In [None]:
# read json silhouette
# pubmed
with open('pubmed/silhouette.json', 'r', encoding='utf-8') as f:
    pubmedJsonSilhouette = json.load(f)
f.close() 
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sr_tf_kmeansPlus_silhouette']
pubmed_boaw_sr_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sr_tfIdf_kmeansPlus_silhouette']
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sr_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bon_sr_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sr_tf_kmeansPlus_silhouette']
pubmed_bon_sr_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sr_tfIdf_kmeansPlus_silhouette']
pubmed_bon_sr_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sr_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bona_sr_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sr_tf_kmeansPlus_silhouette']
pubmed_bona_sr_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sr_tfIdf_kmeansPlus_silhouette']
pubmed_bona_sr_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sr_tfIdfIcf_kmeansPlus_silhouette']
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sfs_tf_kmeansPlus_silhouette']
pubmed_boaw_sfs_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sfs_tfIdf_kmeansPlus_silhouette']
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_sfs_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bon_sfs_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sfs_tf_kmeansPlus_silhouette']
pubmed_bon_sfs_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sfs_tfIdf_kmeansPlus_silhouette']
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_sfs_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bona_sfs_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sfs_tf_kmeansPlus_silhouette']
pubmed_bona_sfs_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sfs_tfIdf_kmeansPlus_silhouette'] 
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_sfs_tfIdfIcf_kmeansPlus_silhouette']
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_tfrf_tf_kmeansPlus_silhouette']
pubmed_boaw_tfrf_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_tfrf_tfIdf_kmeansPlus_silhouette']
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bon_tfrf_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_tfrf_tf_kmeansPlus_silhouette']
pubmed_bon_tfrf_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_tfrf_tfIdf_kmeansPlus_silhouette']
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bon_tfrf_tfIdfIcf_kmeansPlus_silhouette']
pubmed_bona_tfrf_tf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_tfrf_tf_kmeansPlus_silhouette']
pubmed_bona_tfrf_tfIdf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_tfrf_tfIdf_kmeansPlus_silhouette']
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette = pubmedJsonSilhouette['bona_tfrf_tfIdfIcf_kmeansPlus_silhouette']

## Spherical K-Means


# scopus
with open('scopus/silhouette.json', 'r', encoding='utf-8') as f:
    scopusJsonSilhouette = json.load(f)
f.close()
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sr_tf_kmeansPlus_silhouette']
scopus_boaw_sr_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sr_tfIdf_kmeansPlus_silhouette']
scopus_boaw_sr_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sr_tfIdfIcf_kmeansPlus_silhouette']
scopus_bon_sr_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sr_tf_kmeansPlus_silhouette']
scopus_bon_sr_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sr_tfIdf_kmeansPlus_silhouette']
scopus_bon_sr_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sr_tfIdfIcf_kmeansPlus_silhouette']
scopus_bona_sr_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sr_tf_kmeansPlus_silhouette']
scopus_bona_sr_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sr_tfIdf_kmeansPlus_silhouette']
scopus_bona_sr_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sr_tfIdfIcf_kmeansPlus_silhouette']
# SFS
scopus_boaw_sfs_tf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sfs_tf_kmeansPlus_silhouette']
scopus_boaw_sfs_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sfs_tfIdf_kmeansPlus_silhouette']
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_sfs_tfIdfIcf_kmeansPlus_silhouette']
scopus_bon_sfs_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sfs_tf_kmeansPlus_silhouette']
scopus_bon_sfs_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sfs_tfIdf_kmeansPlus_silhouette']
scopus_bon_sfs_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_sfs_tfIdfIcf_kmeansPlus_silhouette']
scopus_bona_sfs_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sfs_tf_kmeansPlus_silhouette']
scopus_bona_sfs_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sfs_tfIdf_kmeansPlus_silhouette'] 
scopus_bona_sfs_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_sfs_tfIdfIcf_kmeansPlus_silhouette']
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_tfrf_tf_kmeansPlus_silhouette']
scopus_boaw_tfrf_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_tfrf_tfIdf_kmeansPlus_silhouette']
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['boaw_tfrf_tfIdfIcf_kmeansPlus_silhouette']
scopus_bon_tfrf_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_tfrf_tf_kmeansPlus_silhouette']
scopus_bon_tfrf_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_tfrf_tfIdf_kmeansPlus_silhouette']
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bon_tfrf_tfIdfIcf_kmeansPlus_silhouette']
scopus_bona_tfrf_tf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_tfrf_tf_kmeansPlus_silhouette']
scopus_bona_tfrf_tfIdf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_tfrf_tfIdf_kmeansPlus_silhouette']
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_silhouette = scopusJsonSilhouette['bona_tfrf_tfIdfIcf_kmeansPlus_silhouette']

## Spherical K-Means

In [None]:
print('SR:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sr_tf_kmeansPlus_silhouette) if i == 2])
print('SFS:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sfs_tf_kmeansPlus_silhouette) if i == 2])
print('TF-RF', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_tfrf_tf_kmeansPlus_silhouette) if i == 2])

In [None]:
# save json purity
# pubmed
pubmedJsonPurity = {
    ## K-Means++
    # SR
    'boaw_sr_tf_kmeansPlus_purity': pubmed_boaw_sr_tf_kmeansPlus_purity,
    'boaw_sr_tfIdf_kmeansPlus_purity': pubmed_boaw_sr_tfIdf_kmeansPlus_purity,
    'boaw_sr_tfIdfIcf_kmeansPlus_purity': pubmed_boaw_sr_tfIdfIcf_kmeansPlus_purity,
    'bon_sr_tf_kmeansPlus_purity': pubmed_bon_sr_tf_kmeansPlus_purity,
    'bon_sr_tfIdf_kmeansPlus_purity': pubmed_bon_sr_tfIdf_kmeansPlus_purity,
    'bon_sr_tfIdfIcf_kmeansPlus_purity': pubmed_bon_sr_tfIdfIcf_kmeansPlus_purity,
    'bona_sr_tf_kmeansPlus_purity': pubmed_bona_sr_tf_kmeansPlus_purity,
    'bona_sr_tfIdf_kmeansPlus_purity': pubmed_bona_sr_tfIdf_kmeansPlus_purity,
    'bona_sr_tfIdfIcf_kmeansPlus_purity': pubmed_bona_sr_tfIdfIcf_kmeansPlus_purity,
    # SFS
    'boaw_sfs_tf_kmeansPlus_purity': pubmed_boaw_sfs_tf_kmeansPlus_purity,
    'boaw_sfs_tfIdf_kmeansPlus_purity': pubmed_boaw_sfs_tfIdf_kmeansPlus_purity,
    'boaw_sfs_tfIdfIcf_kmeansPlus_purity': pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_purity,
    'bon_sfs_tf_kmeansPlus_purity': pubmed_bon_sfs_tf_kmeansPlus_purity,
    'bon_sfs_tfIdf_kmeansPlus_purity': pubmed_bon_sfs_tfIdf_kmeansPlus_purity,
    'bon_sfs_tfIdfIcf_kmeansPlus_purity': pubmed_bon_sfs_tfIdfIcf_kmeansPlus_purity,
    'bona_sfs_tf_kmeansPlus_purity': pubmed_bona_sfs_tf_kmeansPlus_purity,
    'bona_sfs_tfIdf_kmeansPlus_purity': pubmed_bona_sfs_tfIdf_kmeansPlus_purity,
    'bona_sfs_tfIdfIcf_kmeansPlus_purity': pubmed_bona_sfs_tfIdfIcf_kmeansPlus_purity,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_purity': pubmed_boaw_tfrf_tf_kmeansPlus_purity,
    'boaw_tfrf_tfIdf_kmeansPlus_purity': pubmed_boaw_tfrf_tfIdf_kmeansPlus_purity,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_purity': pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_purity,
    'bon_tfrf_tf_kmeansPlus_purity': pubmed_bon_tfrf_tf_kmeansPlus_purity,
    'bon_tfrf_tfIdf_kmeansPlus_purity': pubmed_bon_tfrf_tfIdf_kmeansPlus_purity,
    'bon_tfrf_tfIdfIcf_kmeansPlus_purity': pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_purity,
    'bona_tfrf_tf_kmeansPlus_purity': pubmed_bona_tfrf_tf_kmeansPlus_purity,
    'bona_tfrf_tfIdf_kmeansPlus_purity': pubmed_bona_tfrf_tfIdf_kmeansPlus_purity,
    'bona_tfrf_tfIdfIcf_kmeansPlus_purity': pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_purity,
    ## Spherical K-Means
}
with open('pubmed/purity.json', 'w', encoding='utf-8') as f:
    json.dump(pubmedJsonPurity, f, indent=4)
f.close() 



# scopus
## K-Means++
scopusJsonPurity = {
    # SR
    'boaw_sr_tf_kmeansPlus_purity': scopus_boaw_sr_tf_kmeansPlus_purity,
    'boaw_sr_tfIdf_kmeansPlus_purity': scopus_boaw_sr_tfIdf_kmeansPlus_purity,
    'boaw_sr_tfIdfIcf_kmeansPlus_purity': scopus_boaw_sr_tfIdfIcf_kmeansPlus_purity,
    'bon_sr_tf_kmeansPlus_purity': scopus_bon_sr_tf_kmeansPlus_purity,
    'bon_sr_tfIdf_kmeansPlus_purity': scopus_bon_sr_tfIdf_kmeansPlus_purity,
    'bon_sr_tfIdfIcf_kmeansPlus_purity': scopus_bon_sr_tfIdfIcf_kmeansPlus_purity,
    'bona_sr_tf_kmeansPlus_purity': scopus_bona_sr_tf_kmeansPlus_purity,
    'bona_sr_tfIdf_kmeansPlus_purity': scopus_bona_sr_tfIdf_kmeansPlus_purity,
    'bona_sr_tfIdfIcf_kmeansPlus_purity': scopus_bona_sr_tfIdfIcf_kmeansPlus_purity,
    # SFS
    'boaw_sfs_tf_kmeansPlus_purity': scopus_boaw_sfs_tf_kmeansPlus_purity,
    'boaw_sfs_tfIdf_kmeansPlus_purity': scopus_boaw_sfs_tfIdf_kmeansPlus_purity,
    'boaw_sfs_tfIdfIcf_kmeansPlus_purity': scopus_boaw_sfs_tfIdfIcf_kmeansPlus_purity,
    'bon_sfs_tf_kmeansPlus_purity': scopus_bon_sfs_tf_kmeansPlus_purity,
    'bon_sfs_tfIdf_kmeansPlus_purity': scopus_bon_sfs_tfIdf_kmeansPlus_purity,
    'bon_sfs_tfIdfIcf_kmeansPlus_purity': scopus_bon_sfs_tfIdfIcf_kmeansPlus_purity,
    'bona_sfs_tf_kmeansPlus_purity': scopus_bona_sfs_tf_kmeansPlus_purity,
    'bona_sfs_tfIdf_kmeansPlus_purity': scopus_bona_sfs_tfIdf_kmeansPlus_purity,
    'bona_sfs_tfIdfIcf_kmeansPlus_purity': scopus_bona_sfs_tfIdfIcf_kmeansPlus_purity,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_purity': scopus_boaw_tfrf_tf_kmeansPlus_purity,
    'boaw_tfrf_tfIdf_kmeansPlus_purity': scopus_boaw_tfrf_tfIdf_kmeansPlus_purity,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_purity': scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_purity,
    'bon_tfrf_tf_kmeansPlus_purity': scopus_bon_tfrf_tf_kmeansPlus_purity,
    'bon_tfrf_tfIdf_kmeansPlus_purity': scopus_bon_tfrf_tfIdf_kmeansPlus_purity,
    'bon_tfrf_tfIdfIcf_kmeansPlus_purity': scopus_bon_tfrf_tfIdfIcf_kmeansPlus_purity,
    'bona_tfrf_tf_kmeansPlus_purity': scopus_bona_tfrf_tf_kmeansPlus_purity,
    'bona_tfrf_tfIdf_kmeansPlus_purity': scopus_bona_tfrf_tfIdf_kmeansPlus_purity,
    'bona_tfrf_tfIdfIcf_kmeansPlus_purity': scopus_bona_tfrf_tfIdfIcf_kmeansPlus_purity,
    ## Spherical K-Means
}
with open('scopus/purity.json', 'w', encoding='utf-8') as f:
    json.dump(scopusJsonPurity, f, indent=4)
f.close() 

In [None]:
# read json purity
# pubmed
with open('pubmed/purity.json', 'r', encoding='utf-8') as f:
    pubmedJsonPurity = json.load(f)
f.close() 
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_purity = pubmedJsonPurity['boaw_sr_tf_kmeansPlus_purity']
pubmed_boaw_sr_tfIdf_kmeansPlus_purity = pubmedJsonPurity['boaw_sr_tfIdf_kmeansPlus_purity']
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['boaw_sr_tfIdfIcf_kmeansPlus_purity']
pubmed_bon_sr_tf_kmeansPlus_purity = pubmedJsonPurity['bon_sr_tf_kmeansPlus_purity']
pubmed_bon_sr_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bon_sr_tfIdf_kmeansPlus_purity']
pubmed_bon_sr_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bon_sr_tfIdfIcf_kmeansPlus_purity']
pubmed_bona_sr_tf_kmeansPlus_purity = pubmedJsonPurity['bona_sr_tf_kmeansPlus_purity']
pubmed_bona_sr_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bona_sr_tfIdf_kmeansPlus_purity']
pubmed_bona_sr_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bona_sr_tfIdfIcf_kmeansPlus_purity']
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_purity = pubmedJsonPurity['boaw_sfs_tf_kmeansPlus_purity']
pubmed_boaw_sfs_tfIdf_kmeansPlus_purity = pubmedJsonPurity['boaw_sfs_tfIdf_kmeansPlus_purity']
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['boaw_sfs_tfIdfIcf_kmeansPlus_purity']
pubmed_bon_sfs_tf_kmeansPlus_purity = pubmedJsonPurity['bon_sfs_tf_kmeansPlus_purity']
pubmed_bon_sfs_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bon_sfs_tfIdf_kmeansPlus_purity']
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bon_sfs_tfIdfIcf_kmeansPlus_purity']
pubmed_bona_sfs_tf_kmeansPlus_purity = pubmedJsonPurity['bona_sfs_tf_kmeansPlus_purity']
pubmed_bona_sfs_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bona_sfs_tfIdf_kmeansPlus_purity'] 
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bona_sfs_tfIdfIcf_kmeansPlus_purity']
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_purity = pubmedJsonPurity['boaw_tfrf_tf_kmeansPlus_purity']
pubmed_boaw_tfrf_tfIdf_kmeansPlus_purity = pubmedJsonPurity['boaw_tfrf_tfIdf_kmeansPlus_purity']
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['boaw_tfrf_tfIdfIcf_kmeansPlus_purity']
pubmed_bon_tfrf_tf_kmeansPlus_purity = pubmedJsonPurity['bon_tfrf_tf_kmeansPlus_purity']
pubmed_bon_tfrf_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bon_tfrf_tfIdf_kmeansPlus_purity']
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bon_tfrf_tfIdfIcf_kmeansPlus_purity']
pubmed_bona_tfrf_tf_kmeansPlus_purity = pubmedJsonPurity['bona_tfrf_tf_kmeansPlus_purity']
pubmed_bona_tfrf_tfIdf_kmeansPlus_purity = pubmedJsonPurity['bona_tfrf_tfIdf_kmeansPlus_purity']
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_purity = pubmedJsonPurity['bona_tfrf_tfIdfIcf_kmeansPlus_purity']

## Spherical K-Means


# scopus
with open('scopus/purity.json', 'r', encoding='utf-8') as f:
    scopusJsonPurity = json.load(f)
f.close()
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_purity = scopusJsonPurity['boaw_sr_tf_kmeansPlus_purity']
scopus_boaw_sr_tfIdf_kmeansPlus_purity = scopusJsonPurity['boaw_sr_tfIdf_kmeansPlus_purity']
scopus_boaw_sr_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['boaw_sr_tfIdfIcf_kmeansPlus_purity']
scopus_bon_sr_tf_kmeansPlus_purity = scopusJsonPurity['bon_sr_tf_kmeansPlus_purity']
scopus_bon_sr_tfIdf_kmeansPlus_purity = scopusJsonPurity['bon_sr_tfIdf_kmeansPlus_purity']
scopus_bon_sr_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bon_sr_tfIdfIcf_kmeansPlus_purity']
scopus_bona_sr_tf_kmeansPlus_purity = scopusJsonPurity['bona_sr_tf_kmeansPlus_purity']
scopus_bona_sr_tfIdf_kmeansPlus_purity = scopusJsonPurity['bona_sr_tfIdf_kmeansPlus_purity']
scopus_bona_sr_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bona_sr_tfIdfIcf_kmeansPlus_purity']
# SFS
scopus_boaw_sfs_tf_kmeansPlus_purity = scopusJsonPurity['boaw_sfs_tf_kmeansPlus_purity']
scopus_boaw_sfs_tfIdf_kmeansPlus_purity = scopusJsonPurity['boaw_sfs_tfIdf_kmeansPlus_purity']
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['boaw_sfs_tfIdfIcf_kmeansPlus_purity']
scopus_bon_sfs_tf_kmeansPlus_purity = scopusJsonPurity['bon_sfs_tf_kmeansPlus_purity']
scopus_bon_sfs_tfIdf_kmeansPlus_purity = scopusJsonPurity['bon_sfs_tfIdf_kmeansPlus_purity']
scopus_bon_sfs_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bon_sfs_tfIdfIcf_kmeansPlus_purity']
scopus_bona_sfs_tf_kmeansPlus_purity = scopusJsonPurity['bona_sfs_tf_kmeansPlus_purity']
scopus_bona_sfs_tfIdf_kmeansPlus_purity = scopusJsonPurity['bona_sfs_tfIdf_kmeansPlus_purity'] 
scopus_bona_sfs_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bona_sfs_tfIdfIcf_kmeansPlus_purity']
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_purity = scopusJsonPurity['boaw_tfrf_tf_kmeansPlus_purity']
scopus_boaw_tfrf_tfIdf_kmeansPlus_purity = scopusJsonPurity['boaw_tfrf_tfIdf_kmeansPlus_purity']
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['boaw_tfrf_tfIdfIcf_kmeansPlus_purity']
scopus_bon_tfrf_tf_kmeansPlus_purity = scopusJsonPurity['bon_tfrf_tf_kmeansPlus_purity']
scopus_bon_tfrf_tfIdf_kmeansPlus_purity = scopusJsonPurity['bon_tfrf_tfIdf_kmeansPlus_purity']
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bon_tfrf_tfIdfIcf_kmeansPlus_purity']
scopus_bona_tfrf_tf_kmeansPlus_purity = scopusJsonPurity['bona_tfrf_tf_kmeansPlus_purity']
scopus_bona_tfrf_tfIdf_kmeansPlus_purity = scopusJsonPurity['bona_tfrf_tfIdf_kmeansPlus_purity']
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_purity = scopusJsonPurity['bona_tfrf_tfIdfIcf_kmeansPlus_purity']

## Spherical K-Means

In [None]:
print('SR:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sr_tf_kmeansPlus_purity) if i == 2])
print('SFS:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sfs_tf_kmeansPlus_purity) if i == 2])
print('TF-RF', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_tfrf_tf_kmeansPlus_purity) if i == 2])

In [None]:
# save json ami
# pubmed
pubmedJsonAmi = {
    ## K-Means++
    # SR
    'boaw_sr_tf_kmeansPlus_ami': pubmed_boaw_sr_tf_kmeansPlus_ami,
    'boaw_sr_tfIdf_kmeansPlus_ami': pubmed_boaw_sr_tfIdf_kmeansPlus_ami,
    'boaw_sr_tfIdfIcf_kmeansPlus_ami': pubmed_boaw_sr_tfIdfIcf_kmeansPlus_ami,
    'bon_sr_tf_kmeansPlus_ami': pubmed_bon_sr_tf_kmeansPlus_ami,
    'bon_sr_tfIdf_kmeansPlus_ami': pubmed_bon_sr_tfIdf_kmeansPlus_ami,
    'bon_sr_tfIdfIcf_kmeansPlus_ami': pubmed_bon_sr_tfIdfIcf_kmeansPlus_ami,
    'bona_sr_tf_kmeansPlus_ami': pubmed_bona_sr_tf_kmeansPlus_ami,
    'bona_sr_tfIdf_kmeansPlus_ami': pubmed_bona_sr_tfIdf_kmeansPlus_ami,
    'bona_sr_tfIdfIcf_kmeansPlus_ami': pubmed_bona_sr_tfIdfIcf_kmeansPlus_ami,
    # SFS
    'boaw_sfs_tf_kmeansPlus_ami': pubmed_boaw_sfs_tf_kmeansPlus_ami,
    'boaw_sfs_tfIdf_kmeansPlus_ami': pubmed_boaw_sfs_tfIdf_kmeansPlus_ami,
    'boaw_sfs_tfIdfIcf_kmeansPlus_ami': pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_ami,
    'bon_sfs_tf_kmeansPlus_ami': pubmed_bon_sfs_tf_kmeansPlus_ami,
    'bon_sfs_tfIdf_kmeansPlus_ami': pubmed_bon_sfs_tfIdf_kmeansPlus_ami,
    'bon_sfs_tfIdfIcf_kmeansPlus_ami': pubmed_bon_sfs_tfIdfIcf_kmeansPlus_ami,
    'bona_sfs_tf_kmeansPlus_ami': pubmed_bona_sfs_tf_kmeansPlus_ami,
    'bona_sfs_tfIdf_kmeansPlus_ami': pubmed_bona_sfs_tfIdf_kmeansPlus_ami,
    'bona_sfs_tfIdfIcf_kmeansPlus_ami': pubmed_bona_sfs_tfIdfIcf_kmeansPlus_ami,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_ami': pubmed_boaw_tfrf_tf_kmeansPlus_ami,
    'boaw_tfrf_tfIdf_kmeansPlus_ami': pubmed_boaw_tfrf_tfIdf_kmeansPlus_ami,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_ami': pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_ami,
    'bon_tfrf_tf_kmeansPlus_ami': pubmed_bon_tfrf_tf_kmeansPlus_ami,
    'bon_tfrf_tfIdf_kmeansPlus_ami': pubmed_bon_tfrf_tfIdf_kmeansPlus_ami,
    'bon_tfrf_tfIdfIcf_kmeansPlus_ami': pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_ami,
    'bona_tfrf_tf_kmeansPlus_ami': pubmed_bona_tfrf_tf_kmeansPlus_ami,
    'bona_tfrf_tfIdf_kmeansPlus_ami': pubmed_bona_tfrf_tfIdf_kmeansPlus_ami,
    'bona_tfrf_tfIdfIcf_kmeansPlus_ami': pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_ami,
    ## Spherical K-Means
}
with open('pubmed/ami.json', 'w', encoding='utf-8') as f:
    json.dump(pubmedJsonAmi, f, indent=4)
f.close() 



# scopus
## K-Means++
scopusJsonAmi = {
    # SR
    'boaw_sr_tf_kmeansPlus_ami': scopus_boaw_sr_tf_kmeansPlus_ami,
    'boaw_sr_tfIdf_kmeansPlus_ami': scopus_boaw_sr_tfIdf_kmeansPlus_ami,
    'boaw_sr_tfIdfIcf_kmeansPlus_ami': scopus_boaw_sr_tfIdfIcf_kmeansPlus_ami,
    'bon_sr_tf_kmeansPlus_ami': scopus_bon_sr_tf_kmeansPlus_ami,
    'bon_sr_tfIdf_kmeansPlus_ami': scopus_bon_sr_tfIdf_kmeansPlus_ami,
    'bon_sr_tfIdfIcf_kmeansPlus_ami': scopus_bon_sr_tfIdfIcf_kmeansPlus_ami,
    'bona_sr_tf_kmeansPlus_ami': scopus_bona_sr_tf_kmeansPlus_ami,
    'bona_sr_tfIdf_kmeansPlus_ami': scopus_bona_sr_tfIdf_kmeansPlus_ami,
    'bona_sr_tfIdfIcf_kmeansPlus_ami': scopus_bona_sr_tfIdfIcf_kmeansPlus_ami,
    # SFS
    'boaw_sfs_tf_kmeansPlus_ami': scopus_boaw_sfs_tf_kmeansPlus_ami,
    'boaw_sfs_tfIdf_kmeansPlus_ami': scopus_boaw_sfs_tfIdf_kmeansPlus_ami,
    'boaw_sfs_tfIdfIcf_kmeansPlus_ami': scopus_boaw_sfs_tfIdfIcf_kmeansPlus_ami,
    'bon_sfs_tf_kmeansPlus_ami': scopus_bon_sfs_tf_kmeansPlus_ami,
    'bon_sfs_tfIdf_kmeansPlus_ami': scopus_bon_sfs_tfIdf_kmeansPlus_ami,
    'bon_sfs_tfIdfIcf_kmeansPlus_ami': scopus_bon_sfs_tfIdfIcf_kmeansPlus_ami,
    'bona_sfs_tf_kmeansPlus_ami': scopus_bona_sfs_tf_kmeansPlus_ami,
    'bona_sfs_tfIdf_kmeansPlus_ami': scopus_bona_sfs_tfIdf_kmeansPlus_ami,
    'bona_sfs_tfIdfIcf_kmeansPlus_ami': scopus_bona_sfs_tfIdfIcf_kmeansPlus_ami,
    # TF-RF
    'boaw_tfrf_tf_kmeansPlus_ami': scopus_boaw_tfrf_tf_kmeansPlus_ami,
    'boaw_tfrf_tfIdf_kmeansPlus_ami': scopus_boaw_tfrf_tfIdf_kmeansPlus_ami,
    'boaw_tfrf_tfIdfIcf_kmeansPlus_ami': scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_ami,
    'bon_tfrf_tf_kmeansPlus_ami': scopus_bon_tfrf_tf_kmeansPlus_ami,
    'bon_tfrf_tfIdf_kmeansPlus_ami': scopus_bon_tfrf_tfIdf_kmeansPlus_ami,
    'bon_tfrf_tfIdfIcf_kmeansPlus_ami': scopus_bon_tfrf_tfIdfIcf_kmeansPlus_ami,
    'bona_tfrf_tf_kmeansPlus_ami': scopus_bona_tfrf_tf_kmeansPlus_ami,
    'bona_tfrf_tfIdf_kmeansPlus_ami': scopus_bona_tfrf_tfIdf_kmeansPlus_ami,
    'bona_tfrf_tfIdfIcf_kmeansPlus_ami': scopus_bona_tfrf_tfIdfIcf_kmeansPlus_ami,
    ## Spherical K-Means
}
with open('scopus/ami.json', 'w', encoding='utf-8') as f:
    json.dump(scopusJsonAmi, f, indent=4)
f.close() 

In [None]:
# read json ami
# pubmed
with open('pubmed/ami.json', 'r', encoding='utf-8') as f:
    pubmedJsonAmi = json.load(f)
f.close() 
## K-Means++
# SR
pubmed_boaw_sr_tf_kmeansPlus_ami = pubmedJsonAmi['boaw_sr_tf_kmeansPlus_ami']
pubmed_boaw_sr_tfIdf_kmeansPlus_ami = pubmedJsonAmi['boaw_sr_tfIdf_kmeansPlus_ami']
pubmed_boaw_sr_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['boaw_sr_tfIdfIcf_kmeansPlus_ami']
pubmed_bon_sr_tf_kmeansPlus_ami = pubmedJsonAmi['bon_sr_tf_kmeansPlus_ami']
pubmed_bon_sr_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bon_sr_tfIdf_kmeansPlus_ami']
pubmed_bon_sr_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bon_sr_tfIdfIcf_kmeansPlus_ami']
pubmed_bona_sr_tf_kmeansPlus_ami = pubmedJsonAmi['bona_sr_tf_kmeansPlus_ami']
pubmed_bona_sr_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bona_sr_tfIdf_kmeansPlus_ami']
pubmed_bona_sr_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bona_sr_tfIdfIcf_kmeansPlus_ami']
# SFS
pubmed_boaw_sfs_tf_kmeansPlus_ami = pubmedJsonAmi['boaw_sfs_tf_kmeansPlus_ami']
pubmed_boaw_sfs_tfIdf_kmeansPlus_ami = pubmedJsonAmi['boaw_sfs_tfIdf_kmeansPlus_ami']
pubmed_boaw_sfs_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['boaw_sfs_tfIdfIcf_kmeansPlus_ami']
pubmed_bon_sfs_tf_kmeansPlus_ami = pubmedJsonAmi['bon_sfs_tf_kmeansPlus_ami']
pubmed_bon_sfs_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bon_sfs_tfIdf_kmeansPlus_ami']
pubmed_bon_sfs_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bon_sfs_tfIdfIcf_kmeansPlus_ami']
pubmed_bona_sfs_tf_kmeansPlus_ami = pubmedJsonAmi['bona_sfs_tf_kmeansPlus_ami']
pubmed_bona_sfs_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bona_sfs_tfIdf_kmeansPlus_ami'] 
pubmed_bona_sfs_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bona_sfs_tfIdfIcf_kmeansPlus_ami']
# TF-RF
pubmed_boaw_tfrf_tf_kmeansPlus_ami = pubmedJsonAmi['boaw_tfrf_tf_kmeansPlus_ami']
pubmed_boaw_tfrf_tfIdf_kmeansPlus_ami = pubmedJsonAmi['boaw_tfrf_tfIdf_kmeansPlus_ami']
pubmed_boaw_tfrf_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['boaw_tfrf_tfIdfIcf_kmeansPlus_ami']
pubmed_bon_tfrf_tf_kmeansPlus_ami = pubmedJsonAmi['bon_tfrf_tf_kmeansPlus_ami']
pubmed_bon_tfrf_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bon_tfrf_tfIdf_kmeansPlus_ami']
pubmed_bon_tfrf_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bon_tfrf_tfIdfIcf_kmeansPlus_ami']
pubmed_bona_tfrf_tf_kmeansPlus_ami = pubmedJsonAmi['bona_tfrf_tf_kmeansPlus_ami']
pubmed_bona_tfrf_tfIdf_kmeansPlus_ami = pubmedJsonAmi['bona_tfrf_tfIdf_kmeansPlus_ami']
pubmed_bona_tfrf_tfIdfIcf_kmeansPlus_ami = pubmedJsonAmi['bona_tfrf_tfIdfIcf_kmeansPlus_ami']

## Spherical K-Means


# scopus
with open('scopus/ami.json', 'r', encoding='utf-8') as f:
    scopusJsonAmi = json.load(f)
f.close()
## K-Means++
# SR
scopus_boaw_sr_tf_kmeansPlus_ami = scopusJsonAmi['boaw_sr_tf_kmeansPlus_ami']
scopus_boaw_sr_tfIdf_kmeansPlus_ami = scopusJsonAmi['boaw_sr_tfIdf_kmeansPlus_ami']
scopus_boaw_sr_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['boaw_sr_tfIdfIcf_kmeansPlus_ami']
scopus_bon_sr_tf_kmeansPlus_ami = scopusJsonAmi['bon_sr_tf_kmeansPlus_ami']
scopus_bon_sr_tfIdf_kmeansPlus_ami = scopusJsonAmi['bon_sr_tfIdf_kmeansPlus_ami']
scopus_bon_sr_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bon_sr_tfIdfIcf_kmeansPlus_ami']
scopus_bona_sr_tf_kmeansPlus_ami = scopusJsonAmi['bona_sr_tf_kmeansPlus_ami']
scopus_bona_sr_tfIdf_kmeansPlus_ami = scopusJsonAmi['bona_sr_tfIdf_kmeansPlus_ami']
scopus_bona_sr_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bona_sr_tfIdfIcf_kmeansPlus_ami']
# SFS
scopus_boaw_sfs_tf_kmeansPlus_ami = scopusJsonAmi['boaw_sfs_tf_kmeansPlus_ami']
scopus_boaw_sfs_tfIdf_kmeansPlus_ami = scopusJsonAmi['boaw_sfs_tfIdf_kmeansPlus_ami']
scopus_boaw_sfs_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['boaw_sfs_tfIdfIcf_kmeansPlus_ami']
scopus_bon_sfs_tf_kmeansPlus_ami = scopusJsonAmi['bon_sfs_tf_kmeansPlus_ami']
scopus_bon_sfs_tfIdf_kmeansPlus_ami = scopusJsonAmi['bon_sfs_tfIdf_kmeansPlus_ami']
scopus_bon_sfs_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bon_sfs_tfIdfIcf_kmeansPlus_ami']
scopus_bona_sfs_tf_kmeansPlus_ami = scopusJsonAmi['bona_sfs_tf_kmeansPlus_ami']
scopus_bona_sfs_tfIdf_kmeansPlus_ami = scopusJsonAmi['bona_sfs_tfIdf_kmeansPlus_ami'] 
scopus_bona_sfs_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bona_sfs_tfIdfIcf_kmeansPlus_ami']
# TF-RF
scopus_boaw_tfrf_tf_kmeansPlus_ami = scopusJsonAmi['boaw_tfrf_tf_kmeansPlus_ami']
scopus_boaw_tfrf_tfIdf_kmeansPlus_ami = scopusJsonAmi['boaw_tfrf_tfIdf_kmeansPlus_ami']
scopus_boaw_tfrf_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['boaw_tfrf_tfIdfIcf_kmeansPlus_ami']
scopus_bon_tfrf_tf_kmeansPlus_ami = scopusJsonAmi['bon_tfrf_tf_kmeansPlus_ami']
scopus_bon_tfrf_tfIdf_kmeansPlus_ami = scopusJsonAmi['bon_tfrf_tfIdf_kmeansPlus_ami']
scopus_bon_tfrf_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bon_tfrf_tfIdfIcf_kmeansPlus_ami']
scopus_bona_tfrf_tf_kmeansPlus_ami = scopusJsonAmi['bona_tfrf_tf_kmeansPlus_ami']
scopus_bona_tfrf_tfIdf_kmeansPlus_ami = scopusJsonAmi['bona_tfrf_tfIdf_kmeansPlus_ami']
scopus_bona_tfrf_tfIdfIcf_kmeansPlus_ami = scopusJsonAmi['bona_tfrf_tfIdfIcf_kmeansPlus_ami']

## Spherical K-Means

In [None]:
print('SR:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sr_tf_kmeansPlus_ami) if j == max(pubmed_boaw_sr_tf_kmeansPlus_ami)])
print('SFS:', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_sfs_tf_kmeansPlus_ami) if j == max(pubmed_boaw_sfs_tf_kmeansPlus_ami)])
print('TF-RF', [('k='+str(i+2), j) for (i, j) in enumerate(pubmed_boaw_tfrf_tf_kmeansPlus_ami) if j == max(pubmed_boaw_tfrf_tf_kmeansPlus_ami)])