In [5]:
import os
import json
import pandas as pd
results_dir = 'results'

In [6]:
datasets = [
    'netgpi',
    'deeploc',
    'netsurfp',
    'signalp',
    'spotrna1d',
    'nrc',
    'deepromoter',
    'histone'
    ]

algorithms = [
    'partition_graphpart_needle',
    'partition_graphpart_mmseqs2',
    'reduction_mmseqs2',
    'reduction_cdhit',
    'partition_mmseqs2',
    'partition_cdhit',
]

out_list = []

for dataset in datasets:
    for algorithm in algorithms:
        fpath = os.path.join(results_dir, f'{dataset}_{algorithm}_report.json')
        if not os.path.exists(fpath):
            continue
        data = json.load(open(fpath, 'r'))
        
        if 'graphpart' in algorithm:
            series_dict = {
                    'time_alignment': data['time_edges_complete'] - data['time_script_start'],
                    'time_partitioning': data['time_script_complete'] - data['time_edges_complete']
                }
        else:
            series_dict = {'time_partitioning': data['time_script_complete'] - data['time_script_start']}
        
        series = pd.Series(series_dict, name = f'{dataset}_{algorithm}')
        out_list.append(series)




In [7]:
# make dataframe and format all numbers as minutes:seconds (are seconds)
df = pd.DataFrame(out_list).fillna(0)
df = df.applymap(lambda x: f'{int(x//60)}:{int(x%60):02d}')
df


Unnamed: 0,time_alignment,time_partitioning
netgpi_partition_graphpart_needle,9:57,0:02
netgpi_partition_graphpart_mmseqs2,0:04,0:04
netgpi_reduction_mmseqs2,0:00,0:09
netgpi_reduction_cdhit,0:00,0:30
netgpi_partition_mmseqs2,0:00,0:10
netgpi_partition_cdhit,0:00,0:28
deeploc_partition_graphpart_needle,499:11,0:01
deeploc_partition_graphpart_mmseqs2,3:57,0:31
deeploc_reduction_mmseqs2,0:00,0:15
deeploc_reduction_cdhit,0:00,13:02


In [4]:
df = pd.DataFrame(out_list)
df

Unnamed: 0,time_alignment,time_partitioning
netgpi_partition_graphpart_needle,370.021356,3.131261
netgpi_partition_graphpart_mmseqs2,27.582901,6.021922
deeploc_partition_graphpart_needle,15345.75852,2.893012
deeploc_partition_graphpart_mmseqs2,1595.437951,56.726447
netsurfp_partition_graphpart_needle,62401.694571,1640.029875
netsurfp_partition_graphpart_mmseqs2,4505.449518,1667.702321
signalp_partition_graphpart_needle,18724.954155,102.917607
signalp_partition_graphpart_mmseqs2,685.705397,66.690666
spotrna1d_partition_graphpart_needle,31.332998,0.26712
spotrna1d_partition_graphpart_mmseqs2,1.741057,0.270862
