To run this notebook you first need to run the notebook `[f'{dataset["folder"]}/evaluation/compute_metrics.ipynb' for dataset in datasets]` or obtain the data generated for such notebook in the corresponding folders
---

In [1]:
from json import dump, load
datasets = [
    {
        'name': 'Homo sapiens',
        'folder': '.',
    },
    {
        'name': 'Mus musculus',
        'folder': './mouse'
    },
    {
        'name': 'Triticum aestivum',
        'folder': './triticum_aestivum'
    },
    {
        'name': 'Hordeum vulgare',
        'folder': './hordeum_vulgare'
    },
    {
        'name': 'Drosophila melanogaster',
        'folder': './fruit_fly'
    },
    {
        'name': 'Magnaporthe oryzae',
        'folder': './magnaporthe_oryzae'
    }
]

In [2]:
summary_data = dict()
for dataset in datasets:
    f = open(f'{dataset["folder"]}/evaluation/summary.json', 'r')
    summary_data[dataset['name']] = load(f)
    f.close()

In [3]:
limit_cdss_60, limit_cdss_30, limit_cdss_10 = 1000, 2500, 40620
limit_60, limit_30, limit_10 = 2000,5000,205012
limit_size_60, limit_size_30, limit_size_10 = 15,50,725

In [4]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{n:.2f}"

def format_cell(data, key):
    
    try:
        mdata = data[key]
        return f"""\\cellx{{2}}{{{format_number(mdata['abs_improvements_base']['mean'])}\\\\{format_number(mdata['rel_improvements_base']['mean'])}}}  & \\cellx{{2}}{{{format_number(mdata['abs_improvements_vertex']['mean'])}\\\\{format_number(mdata['rel_improvements_vertex']['mean'])}}}"""
    except:
        return f"""--  & --"""

def format_row(data, key):
    
    mdata = data[key]['impr_dict']['k+1']
    return f"""{key}  & {format_cell(mdata, 'small')} & {format_cell(mdata, 'medium')}  & {format_cell(mdata, 'large')} \\\\\\hline"""

def format_first_paper_table(data):
    a = f"""\\begin{{center}}
\\begin{{tabular}}{{|c|cc|cc|cc|}}
\\hline
 & \\multicolumn{{2}}{{c|}}{{small graphs (3-{limit_size_60} vertices)}}  & \\multicolumn{{2}}{{c|}}{{medium graphs ({limit_size_60+1}-{limit_size_30} vertices)}} & \\multicolumn{{2}}{{c|}}{{large graphs ({limit_size_30+1}-{limit_size_10} vertices)}}    \\\\
Dataset  & bases     & vertices      & bases     & vertices     & bases     & vertices     \\\\\\hline\\hline
{format_row(data, 'Homo sapiens')}
{format_row(data, 'Mus musculus')}\\hline
{format_row(data, 'Triticum aestivum')}
{format_row(data, 'Hordeum vulgare')}\\hline
{format_row(data, 'Drosophila melanogaster')}\\hline
{format_row(data, 'Magnaporthe oryzae')}
\\end{{tabular}} 
\\end{{center}}"""
    return a


print(format_first_paper_table(summary_data))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small graphs (3-15 vertices)}  & \multicolumn{2}{c|}{medium graphs (16-50 vertices)} & \multicolumn{2}{c|}{large graphs (51-725 vertices)}    \\
Dataset  & bases     & vertices      & bases     & vertices     & bases     & vertices     \\\hline\hline
Homo sapiens  & \cellx{2}{160.95\\1.41}  & \cellx{2}{0.58\\1.18} & \cellx{2}{245.65\\1.80}  & \cellx{2}{1.53\\1.45}  & \cellx{2}{233.60\\1.96}  & \cellx{2}{1.93\\1.55} \\\hline
Mus musculus  & \cellx{2}{202.29\\1.47}  & \cellx{2}{0.72\\1.23} & \cellx{2}{334.33\\1.95}  & \cellx{2}{2.02\\1.57}  & \cellx{2}{425.96\\2.30}  & \cellx{2}{3.13\\1.81} \\\hline\hline
Triticum aestivum  & \cellx{2}{765.49\\2.84}  & \cellx{2}{3.29\\1.96} & \cellx{2}{1078.49\\4.07}  & \cellx{2}{6.74\\2.81}  & \cellx{2}{3103.23\\8.14}  & \cellx{2}{19.84\\5.62} \\\hline
Hordeum vulgare  & \cellx{2}{150.79\\1.89}  & \cellx{2}{0.64\\1.21} & \cellx{2}{192.70\\2.56}  & \cellx{2}{1.42\\1.44}  & \cellx{

In [5]:
import math
def trunc(number, decimals):
    factor = 10.0 ** decimals
    return math.trunc(number * factor) / factor

def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key, variant):
    
    try:
        unitigdata = data['unitigs'][key]
        mdata = data['k+1'][key]
        return f"""\\cellto{{{format_number(unitigdata['e_size_density'+variant]['mean'])}}}{{{format_number(mdata['e_size_density'+variant]['mean'])}}} & \\cellto{{{format_number(unitigdata['max_prop_cov'+variant]['mean'])}}}{{{format_number(mdata['max_prop_cov'+variant]['mean'])}}}"""
    except:
        return "-- & --"

def format_row(data, key, variant):
    
    mdata = data[key]['fixed_l_dict']
    return f"""{key} & {format_cell(mdata, 'small', variant)} & {format_cell(mdata, 'medium', variant)} & {format_cell(mdata, 'large', variant)} \\\\\\hline"""
    

def format_second_paper_table(data, variant=''):
    
    a = f"""\\begin{{center}}
\\begin{{tabular}}{{|c|cc|cc|cc|}}
\\hline
 & \\multicolumn{{2}}{{c|}}{{small (1-{limit_60} bases)}}    & \\multicolumn{{2}}{{c|}}{{medium ({limit_60+1}-{limit_30} bases)}} & \\multicolumn{{2}}{{c|}}{{large ({limit_30+1}-{limit_10} bases)}}    \\\\
Dataset  & \\texttt{{esr}}     & \\texttt{{mcr}}     & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{esr}}     & \\texttt{{mcr}}    \\\\\\hline\\hline
{format_row(data, 'Homo sapiens', variant)}
{format_row(data, 'Mus musculus', variant)}\\hline
{format_row(data, 'Triticum aestivum', variant)}
{format_row(data, 'Hordeum vulgare', variant)}\\hline
{format_row(data, 'Drosophila melanogaster', variant)}\\hline
{format_row(data, 'Magnaporthe oryzae', variant)}
\\end{{tabular}} 
\\end{{center}}"""

    return a

print(format_second_paper_table(summary_data, '_bases'))
print(format_second_paper_table(summary_data, '_vertex'))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small (1-2000 bases)}    & \multicolumn{2}{c|}{medium (2001-5000 bases)} & \multicolumn{2}{c|}{large (5001-205012 bases)}    \\
Dataset  & \texttt{esr}     & \texttt{mcr}     & \texttt{esr}     & \texttt{mcr}    & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
Homo sapiens & \cellto{0.33}{0.49} & \cellto{0.49}{0.63} & \cellto{0.33}{0.41} & \cellto{0.48}{0.56} & \cellto{0.37}{0.43} & \cellto{0.52}{0.58} \\\hline
Mus musculus & \cellto{0.40}{0.58} & \cellto{0.55}{0.72} & \cellto{0.37}{0.51} & \cellto{0.51}{0.66} & \cellto{0.41}{0.50} & \cellto{0.55}{0.64} \\\hline\hline
Triticum aestivum & \cellto{0.49}{0.72} & \cellto{0.63}{0.81} & \cellto{0.46}{0.64} & \cellto{0.60}{0.74} & \cellto{0.49}{0.63} & \cellto{0.62}{0.74} \\\hline
Hordeum vulgare & \cellto{0.35}{0.43} & \cellto{0.48}{0.57} & \cellto{0.23}{0.28} & \cellto{0.37}{0.42} & \cellto{0.19}{0.22} & \cellto{0.33}{0.37} \\\hline\hline
Drosophila melanogaster &

In [6]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key, variant):
    
    try:
        unitigdata = data['unitigs'][key]
        mdata = data['k+1'][key]
        return f"""\\cellto{{{format_number(unitigdata['precision'+variant]['mean'])}}}{{{format_number(mdata['precision'+variant]['mean'])}}}  & \\cellto{{{format_number(unitigdata['e_size_density'+variant]['mean'])}}}{{{format_number(mdata['e_size_density'+variant]['mean'])}}}  & \\cellto{{{format_number(unitigdata['max_prop_cov'+variant]['mean'])}}}{{{format_number(mdata['max_prop_cov'+variant]['mean'])}}}"""
    except:
        return "-- & -- & --"
    
def format_row(data, key, variant):
    
    mdata = data[key]['fixed_rd_dict']
    return f"""{key} & {format_cell(mdata, 'small', variant)}  & {format_cell(mdata, 'medium', variant)} & {format_cell(mdata, 'large', variant)}  \\\\\\hline"""
    

def format_third_paper_table(data, variant=''):
    
    a = f"""\\begin{{center}}
    \\begin{{tabular}}{{|c|ccc|ccc|ccc|}}
\\hline
 & \\multicolumn{{3}}{{c|}}{{small graphs (3-{limit_size_60} vertices)}}        & \\multicolumn{{3}}{{c|}}{{medium graphs ({limit_size_60+1}-{limit_size_30} vertices)}}     & \\multicolumn{{3}}{{c|}}{{large graphs ({limit_size_30+1}-{limit_size_10} vertices)}}          \\\\
Dataset  & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    \\\\\\hline\\hline
{format_row(data, 'Homo sapiens', variant)}
{format_row(data, 'Mus musculus', variant)}\\hline
{format_row(data, 'Triticum aestivum', variant)}
{format_row(data, 'Hordeum vulgare', variant)}\\hline
{format_row(data, 'Drosophila melanogaster', variant)}\\hline
{format_row(data, 'Magnaporthe oryzae', variant)}
\\end{{tabular}} 
\\end{{center}}"""
    
    return a


print(format_third_paper_table(summary_data, '_bases'))
print(format_third_paper_table(summary_data, '_vertex'))

\begin{center}
    \begin{tabular}{|c|ccc|ccc|ccc|}
\hline
 & \multicolumn{3}{c|}{small graphs (3-15 vertices)}        & \multicolumn{3}{c|}{medium graphs (16-50 vertices)}     & \multicolumn{3}{c|}{large graphs (51-725 vertices)}          \\
Dataset  & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
Homo sapiens & \cellto{1.00}{1.00}  & \cellto{0.49}{0.68}  & \cellto{0.62}{0.81}  & \cellto{1.00}{0.99}  & \cellto{0.35}{0.48}  & \cellto{0.51}{0.63} & \cellto{1.00}{0.99}  & \cellto{0.27}{0.37}  & \cellto{0.42}{0.52}  \\\hline
Mus musculus & \cellto{1.00}{1.00}  & \cellto{0.51}{0.71}  & \cellto{0.64}{0.83}  & \cellto{1.00}{0.99}  & \cellto{0.36}{0.51}  & \cellto{0.51}{0.66} & \cellto{1.00}{0.99}  & \cellto{0.29}{0.44}  & \cellto{0.44}{0.58}  \\\hline\hline
Triticum aestivum & \cellto{1.00}{1.00}  & \cellto{0.49}{0.72}  & \cellto{0.63}{0.80}  & \cellto{1.00}{1.0

In [7]:
def format_time(time):
    return f'{time/1000000:.2f}'

def format_row(data, key):
    mdata = data[key]['time_dict']['k+1']
    return f"""{key} & {format_time(mdata['unoptimized']['all']['time'])}  & {format_time(mdata['optimized']['all']['time'])}\\\\\\hline"""

def format_fourth_table(data):
    a = f"""\\begin{{tabular}}{{|l|l|l|}}
\\hline
Datasets & Unoptimized (secs) & Optimized (secs)\\\\ \\hline\\hline
{format_row(data, 'Homo sapiens')}
{format_row(data, 'Mus musculus')}\\hline
{format_row(data, 'Triticum aestivum')}
{format_row(data, 'Hordeum vulgare')}\\hline
{format_row(data, 'Drosophila melanogaster')}\\hline
{format_row(data, 'Magnaporthe oryzae')}
\\end{{tabular}}"""
    return a

print(format_fourth_table(summary_data))

\begin{tabular}{|l|l|l|}
\hline
Datasets & Unoptimized (secs) & Optimized (secs)\\ \hline\hline
Homo sapiens & 32.55  & 11.29\\\hline
Mus musculus & 12.34  & 4.78\\\hline\hline
Triticum aestivum & 2.88  & 0.82\\\hline
Hordeum vulgare & 29.13  & 10.95\\\hline\hline
Drosophila melanogaster & 0.52  & 0.30\\\hline\hline
Magnaporthe oryzae & 0.01  & 0.01\\\hline
\end{tabular}


In [8]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key):
    
    try:
        unitigdata = data['unitigs'][key]
        mdata = data['k+1'][key]
        return f"""\\cellto{{{format_number(unitigdata['max_cov_rel']['mean'])}}}{{{format_number(mdata['max_cov_rel']['mean'])}}}"""
    except:
        return "--"

def format_row(data, key):
    
    mdata = data[key]['cdss_dict']
    return f"""{key} & {format_cell(mdata, 'small')} & {format_cell(mdata, 'medium')} & {format_cell(mdata, 'large')} \\\\\\hline"""
    

def format_cdss_paper_table(data):
    
    a = f"""\\begin{{table}}[t]
\centering
\caption{{cdss max relative coverage grouped by cdss size}}
\\begin{{tabular}}{{|c|c|c|c|}}
\\hline
Dataset & small (1-{limit_cdss_60} bases)    &  medium ({limit_cdss_60+1}-{limit_cdss_30} bases) & large ({limit_cdss_30+1}-{limit_cdss_10} bases)\\\\\\hline\\hline
{format_row(data, 'Homo sapiens')}
{format_row(data, 'Mus musculus')}\\hline
{format_row(data, 'Triticum aestivum')}
{format_row(data, 'Hordeum vulgare')}\\hline
{format_row(data, 'Drosophila melanogaster')}\\hline
{format_row(data, 'Magnaporthe oryzae')}
\\end{{tabular}} 
\\end{{table}}"""

    return a

print(format_cdss_paper_table(summary_data))

\begin{table}[t]
\centering
\caption{cdss max relative coverage grouped by cdss size}
\begin{tabular}{|c|c|c|c|}
\hline
Dataset & small (1-1000 bases)    &  medium (1001-2500 bases) & large (2501-40620 bases)\\\hline\hline
Homo sapiens & \cellto{0.59}{0.70} & \cellto{0.45}{0.54} & \cellto{0.41}{0.48} \\\hline
Mus musculus & \cellto{0.69}{0.79} & \cellto{0.56}{0.67} & \cellto{0.49}{0.57} \\\hline\hline
Triticum aestivum & \cellto{0.67}{0.85} & \cellto{0.68}{0.83} & \cellto{0.65}{0.77} \\\hline
Hordeum vulgare & \cellto{0.59}{0.70} & \cellto{0.49}{0.56} & \cellto{0.43}{0.50} \\\hline\hline
Drosophila melanogaster & \cellto{0.79}{0.94} & \cellto{0.77}{0.90} & \cellto{0.68}{0.80} \\\hline\hline
Magnaporthe oryzae & \cellto{0.80}{0.95} & \cellto{0.91}{0.98} & \cellto{0.82}{0.96} \\\hline
\end{tabular} 
\end{table}
