In [1]:
import glob
import argparse
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

evaluation_methods = ['ari', 'nmi', 'purity']
ari_best_result = dict()
nmi_best_result = dict()
purity_best_result = dict()


In [2]:
total = 0
for dirs in glob.glob('beta_re_34/*'):
    for em in evaluation_methods:
        with open(dirs + '/' + em + '/total_best_result.txt') as f:
            lines = f.read().splitlines()
            for i, line in enumerate(lines):
                if i == 0: continue
                line = line.split('=')[-1].split(':')
                k, v = line[0].strip(), line[-1].strip()
                if em == 'ari':
                    if k in ari_best_result:
                        ari_best_result[k] += float(v)
                    else:
                        ari_best_result[k] = float(v)
                elif em == 'nmi':
                    if k in nmi_best_result:
                        nmi_best_result[k] += float(v)
                    else:
                        nmi_best_result[k] = float(v)
                else:
                    if k in purity_best_result:
                        purity_best_result[k] += float(v.split('%')[0])
                    else:
                        purity_best_result[k] = float(v.split('%')[0])
    total += 1


In [3]:
for k, v in ari_best_result.items():
    ari_best_result[k] = round(v / total, 3)
for k, v in nmi_best_result.items():
    nmi_best_result[k] = round(v / total, 3)
for k, v in purity_best_result.items():
    purity_best_result[k] = round(v / (total * 100), 3)

print(ari_best_result)
print(nmi_best_result)
print(purity_best_result)

{'jideca_b01g01re_34': 0.312, 'jideca_b01g1re_34': 0.267, 'jideca_b1g05re_34': 0.259, 'jideca_b01g05re_34': 0.259, 'jideca_b1g01re_34': 0.255, 'jideca_b05g01re_34': 0.248, 'jideca_b01g02re_34': 0.198, 'jideca_b05g02re_34': 0.19, 'jideca_b1g10re_34': 0.175, 'jideca_b05g10re_34': 0.17, 'jideca_b1g1re_34': 0.134, 'jideca_b05g1re_34': 0.134, 'jideca_b05g05re_34': 0.181, 'jideca_b1g02re_34': 0.197, 'jideca_b01g10re_34': 0.059}
{'jideca_b01g01re_34': 0.598, 'jideca_b1g05re_34': 0.584, 'jideca_b01g05re_34': 0.568, 'jideca_b05g01re_34': 0.562, 'jideca_b1g01re_34': 0.552, 'jideca_b01g1re_34': 0.555, 'jideca_b01g02re_34': 0.52, 'jideca_b05g02re_34': 0.517, 'jideca_b1g02re_34': 0.527, 'jideca_b05g05re_34': 0.503, 'jideca_b05g1re_34': 0.472, 'jideca_b1g10re_34': 0.462, 'jideca_b1g1re_34': 0.455, 'jideca_b05g10re_34': 0.47, 'jideca_b01g10re_34': 0.36}
{'jideca_b01g01re_34': 0.293, 'jideca_b01g1re_34': 0.332, 'jideca_b1g10re_34': 0.289, 'jideca_b01g05re_34': 0.307, 'jideca_b05g10re_34': 0.308, 'jide

In [4]:
df_ari = pd.DataFrame(list(ari_best_result.items()), columns=['parameters', 'ari'])
df_nmi = pd.DataFrame(list(nmi_best_result.items()), columns=['parameters', 'nmi'])
df_purity = pd.DataFrame(list(purity_best_result.items()), columns=['parameters', 'purity'])

df_ari['simple'] = df_ari['parameters'].apply(lambda x: x.split('_')[1].split('g')[1][0:])
#df_ari['simple'] = df_ari['simple'].apply(lambda x: float('0.' + x[1]) if x[0] == '0' else float(x))
df_ari['simple'] = df_ari['simple'].apply(lambda x: '0.' + x[1] if x[0] == '0' else x[0:-2])
df_ari['types'] = df_ari['parameters'].apply(lambda x:x.split('_')[1][-2:])
df_ari['types'] = df_ari['types'].apply(lambda x: 'real' if x == 're' else 'semantic')

df_nmi['simple'] = df_nmi['parameters'].apply(lambda x:x.split('_')[1].split('g')[1][0:])
#df_nmi['simple'] = df_nmi['simple'].apply(lambda x: float('0.' + x[1]) if x[0] == '0' else float(x))
df_nmi['simple'] = df_nmi['simple'].apply(lambda x: '0.' + x[1] if x[0] == '0' else x[0:-2])
df_nmi['types'] = df_nmi['parameters'].apply(lambda x:x.split('_')[1][-2:])
df_nmi['types'] = df_nmi['types'].apply(lambda x: 'real' if x == 're' else 'semantic')

df_purity['simple'] = df_purity['parameters'].apply(lambda x:x.split('_')[1].split('g')[1][0:])
#df_purity['simple'] = df_purity['simple'].apply(lambda x: float('0.' + x[1]) if x[0] == '0' else float(x))
df_purity['simple'] = df_purity['simple'].apply(lambda x: '0.' + x[1] if x[0] == '0' else x[0:-2])
df_purity['types'] = df_purity['parameters'].apply(lambda x:x.split('_')[1][-2:])
df_purity['types'] = df_purity['types'].apply(lambda x: 'real' if x == 're' else 'semantic')


In [6]:
df_ari.to_csv('../result/jideca_ari_means_b01_05_1.csv')
df_nmi.to_csv('../result/jideca_nmi_means_b01_05_1.csv')
df_purity.to_csv('../result/jideca_purity_means_b01_05_1.csv')
df_purity.loc[df_purity['types'] == 'real'].sort_values(by='simple')

Unnamed: 0,parameters,purity,simple,types
0,jideca_b01g01re_34,0.293,0.1,real
8,jideca_b1g01re_34,0.258,0.1,real
11,jideca_b05g01re_34,0.29,0.1,real
5,jideca_b1g02re_34,0.292,0.2,real
6,jideca_b05g02re_34,0.239,0.2,real
7,jideca_b01g02re_34,0.257,0.2,real
3,jideca_b01g05re_34,0.307,0.5,real
9,jideca_b05g05re_34,0.24,0.5,real
10,jideca_b1g05re_34,0.283,0.5,real
1,jideca_b01g1re_34,0.332,1.0,real
