To run this notebook you first need to run the notebook `evaluation/compute_metrics.ipynb` or obtain the data generated for such notebook in the corresponding folders
---

In [1]:
from json import dump, load
from pprint import pprint
from statistics import median, mean, stdev
import networkx as nx

## Load gene graphs

f = open(f'../gene_graphs/vertices_inv.json', 'r')
vertices_inv = load(f)
f.close()
vertices_inv = {int(k): tuple(v) for k,v in vertices_inv.items()}

n = int(open('../gene_graphs/info', 'r').read())

components = list()
for i in range(n): ## Number of gene_graphs
    components.append(dict())
    components[i]['graph'] = nx.read_edgelist(f'../gene_graphs/graphs/component_{i+1}.edgelist', delimiter=':', create_using=nx.DiGraph, nodetype=int)
    components[i]['len'] = len(components[i]['graph'])
    
    f = open(f'../gene_graphs/sources/component_{i+1}.json', 'r')
    components[i]['sources'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/targets/component_{i+1}.json', 'r')
    components[i]['targets'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/vertex_constrains/component_{i+1}.json', 'r')
    components[i]['vertex_constrains'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/transcript_paths/component_{i+1}.json', 'r')
    components[i]['transcript_paths'] = load(f)
    f.close()
    

In [2]:
## Load experiments from json files
for i, component in enumerate(components):
    if component['len'] > 2 and len(component['transcript_paths']) > 1:
        file = open(f'../safe_paths_json/component_{i+1}.json', "r")
        dd = load(file)
        component.update(dd)
        file.close()

In [3]:
def interval_length(interval):
    return interval[1]-interval[0]+1

## It computes the base length of a transcript
def base_length(contig, vertices_inv):
    length = 0
    for v in contig:
        length += interval_length(vertices_inv[v])
    return length

In [4]:
# def choose_60_30_10_limits_cdss(components):
#     lengths = list()
#     for i, component in enumerate(components):
#         if component['len'] > 2 and len(component['transcript_paths']) > 1:
#             for CDSs in component['cdss_coverage']:
#                 for CDS in CDSs:
#                     lengths.append(CDS['length'])
#     lengths = sorted(lengths)
#     return lengths[int(0.6*len(lengths))], lengths[int(0.9*len(lengths))], lengths[-1]

# limit_cdss_60, limit_cdss_30, limit_cdss_10 = choose_60_30_10_limits_cdss(components)
# limit_cdss_60, limit_cdss_30, limit_cdss_10
limit_cdss_60, limit_cdss_30, limit_cdss_10 = 150, 500, 27705

In [5]:
# def choose_60_30_10_limits(components, vertices_inv):
#     lengths = list()
#     for i, component in enumerate(components):
#         if component['len'] > 2 and len(component['transcript_paths']) > 1:
#             for transcript in component['transcript_paths']:
#                 lengths.append(base_length(transcript['transcript_path'], vertices_inv))
#     lengths = sorted(lengths)
#     return lengths[int(0.6*len(lengths))], lengths[int(0.9*len(lengths))], lengths[-1]

# limit_60, limit_30, limit_10 = choose_60_30_10_limits(components, vertices_inv)
# limit_60, limit_30, limit_10
limit_60, limit_30, limit_10 = 2000,5000,205012

In [6]:
# def choose_60_30_10_limits_size(components, vertices_inv):
#     sizes = list()
#     for i, component in enumerate(components):
#         if component['len'] > 2 and len(component['transcript_paths']) > 1:
#             sizes.append(component['len'])
#     sizes = sorted(sizes)
#     return sizes[int(0.6*len(sizes))], sizes[int(0.9*len(sizes))], sizes[-1]

# limit_size_60, limit_size_30, limit_size_10 = choose_60_30_10_limits_size(components, vertices_inv)
# limit_size_60, limit_size_30, limit_size_10
limit_size_60, limit_size_30, limit_size_10 = 15,50,725

In [7]:
def compute_abs_improvement_table(over_width, limit_60, limit_30, components, vertices_inv):
    unitigs = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component['experiments'].get(l, None) is None:
                l = str(2*width)

            safe_paths = component['experiments'][str(l)]['safe_paths']
            uni = component['unitigs']
            
            abs_vertex = component['experiments'][str(l)]['impr_vertex']
            abs_base = component['experiments'][str(l)]['impr_base']
            
            abs_vertex = list(map(lambda r:  r[1] - len(uni[r[0]]), enumerate(abs_vertex)))
            abs_base = list(map(lambda r:  r[1] - base_length(uni[r[0]], vertices_inv), enumerate(abs_base)))
            
                
            for j in range(len(abs_base)):
                d = dict()
                d['component'] = i
                d['component_length'] = component['len']
                d['abs_improvements_base'] = abs_base[j]
                d['abs_improvements_vertex'] = abs_vertex[j]
                
                unitigs.append(d)

    small = list(filter(lambda c: c['component_length'] <= limit_60, unitigs))
    medium = list(filter(lambda c: c['component_length'] > limit_60 and c['component_length'] <= limit_30 , unitigs))
    large = list(filter(lambda c: c['component_length'] > limit_30, unitigs))
    
    small_abs_improvements_base = list(map(lambda c: c['abs_improvements_base'] , small))
    medium_abs_improvements_base = list(map(lambda c: c['abs_improvements_base'] , medium))
    large_abs_improvements_base = list(map(lambda c: c['abs_improvements_base'] , large))
    total_abs_improvements_base = list(map(lambda c: c['abs_improvements_base'] , unitigs))
    
    small_abs_improvements_vertex = list(map(lambda c: c['abs_improvements_vertex'] , small))
    medium_abs_improvements_vertex = list(map(lambda c: c['abs_improvements_vertex'] , medium))
    large_abs_improvements_vertex = list(map(lambda c: c['abs_improvements_vertex'] , large))
    total_abs_improvements_vertex = list(map(lambda c: c['abs_improvements_vertex'] , unitigs))
    
    
    
    
    return { 
        'small':
        {
            'abs_improvements_vertex': 
            {
                'median': median(small_abs_improvements_vertex),
                'mean': mean(small_abs_improvements_vertex),
                'stdev': stdev(small_abs_improvements_vertex)
            },
            'abs_improvements_base':
            {
                'median': median(small_abs_improvements_base),
                'mean': mean(small_abs_improvements_base),
                'stdev': stdev(small_abs_improvements_base)
            },
            'number_of_unitigs': len(small)            
        },
        'total':
        {
            'abs_improvements_vertex': 
            {
                'median': median(total_abs_improvements_vertex),
                'mean': mean(total_abs_improvements_vertex),
                'stdev': stdev(total_abs_improvements_vertex)
            },
            'abs_improvements_base':
            {
                'median': median(total_abs_improvements_base),
                'mean': mean(total_abs_improvements_base),
                'stdev': stdev(total_abs_improvements_base)
            },
            'number_of_unitigs': len(unitigs)
            
        }
    }
    
pprint(compute_abs_improvement_table(0, limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'abs_improvements_base': {'mean': 709.3188405797101,
                                     'median': 325.0,
                                     'stdev': 923.8967062805334},
           'abs_improvements_vertex': {'mean': 1.1884057971014492,
                                       'median': 1.0,
                                       'stdev': 0.5342396826390701},
           'number_of_unitigs': 138},
 'total': {'abs_improvements_base': {'mean': 709.3188405797101,
                                     'median': 325.0,
                                     'stdev': 923.8967062805334},
           'abs_improvements_vertex': {'mean': 1.1884057971014492,
                                       'median': 1.0,
                                       'stdev': 0.5342396826390701},
           'number_of_unitigs': 138}}


In [8]:
def compute_rel_improvement_table(over_width, limit_60, limit_30, components, vertices_inv):
    unitigs = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component['experiments'].get(l, None) is None:
                l = str(2*width)

            uni = component['unitigs']
            safe_paths = component['experiments'][str(l)]['safe_paths']
            
            rel_vertex = component['experiments'][str(l)]['impr_vertex']
            rel_base = component['experiments'][str(l)]['impr_base']
            
            rel_vertex = list(map(lambda r:  r[1]/len(uni[r[0]]), enumerate(rel_vertex)))
            rel_base = list(map(lambda r:  r[1]/base_length(uni[r[0]], vertices_inv), enumerate(rel_base)))
            
            
                
            for j in range(len(rel_base)):
                d = dict()
                d['component'] = i
                d['component_length'] = component['len']
                d['rel_improvements_base'] = rel_base[j]
                d['rel_improvements_vertex'] = rel_vertex[j]
                
                unitigs.append(d)

    small = list(filter(lambda c: c['component_length'] <= limit_60, unitigs))
    medium = list(filter(lambda c: c['component_length'] > limit_60 and c['component_length'] <= limit_30 , unitigs))
    large = list(filter(lambda c: c['component_length'] > limit_30, unitigs))
    
    small_rel_improvements_base = list(map(lambda c: c['rel_improvements_base'] , small))
    medium_rel_improvements_base = list(map(lambda c: c['rel_improvements_base'] , medium))
    large_rel_improvements_base = list(map(lambda c: c['rel_improvements_base'] , large))
    total_rel_improvements_base = list(map(lambda c: c['rel_improvements_base'] , unitigs))
    
    small_rel_improvements_vertex = list(map(lambda c: c['rel_improvements_vertex'] , small))
    medium_rel_improvements_vertex = list(map(lambda c: c['rel_improvements_vertex'] , medium))
    large_rel_improvements_vertex = list(map(lambda c: c['rel_improvements_vertex'] , large))
    total_rel_improvements_vertex = list(map(lambda c: c['rel_improvements_vertex'] , unitigs))
    
    
    
    
    return { 
        'small':
        {
            'rel_improvements_vertex': 
            {
                'median': median(small_rel_improvements_vertex),
                'mean': mean(small_rel_improvements_vertex),
                'stdev': stdev(small_rel_improvements_vertex)
            },
            'rel_improvements_base':
            {
                'median': median(small_rel_improvements_base),
                'mean': mean(small_rel_improvements_base),
                'stdev': stdev(small_rel_improvements_base)
            },
            'number_of_unitigs': len(small)            
        },
        'total':
        {
            'rel_improvements_vertex': 
            {
                'median': median(total_rel_improvements_vertex),
                'mean': mean(total_rel_improvements_vertex),
                'stdev': stdev(total_rel_improvements_vertex)
            },
            'rel_improvements_base':
            {
                'median': median(total_rel_improvements_base),
                'mean': mean(total_rel_improvements_base),
                'stdev': stdev(total_rel_improvements_base)
            },
            'number_of_unitigs': len(unitigs)
            
        }
    }
    
pprint(compute_rel_improvement_table(-1, limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'number_of_unitigs': 138,
           'rel_improvements_base': {'mean': 1.0115190792874333,
                                     'median': 1.0,
                                     'stdev': 0.09536169813923688},
           'rel_improvements_vertex': {'mean': 1.0144927536231885,
                                       'median': 1.0,
                                       'stdev': 0.11994568395899624}},
 'total': {'number_of_unitigs': 138,
           'rel_improvements_base': {'mean': 1.0115190792874333,
                                     'median': 1.0,
                                     'stdev': 0.09536169813923688},
           'rel_improvements_vertex': {'mean': 1.0144927536231885,
                                       'median': 1.0,
                                       'stdev': 0.11994568395899624}}}


In [9]:
def compute_contigs_table(over_width, limit_60, limit_30, components, vertices_inv):
    contigs = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component['experiments'].get(l, None) is None:
                l = str(2*width)
                
            if component['experiments'].get(l, None) is not None:
                safe_paths_info = component['experiments'][l]
                
                for safe_path in safe_paths_info['safe_paths']:
                    d = dict()
                    d['component'] = i
                    d['component_length'] = component['len']
                    d['vertices'] = len(safe_path)
                    d['length'] = base_length(safe_path, vertices_inv)
                    
                    contigs.append(d)

                
                    
    
    
   
    small = list(filter(lambda c: c['component_length'] <= limit_60, contigs))
    medium = list(filter(lambda c: c['component_length'] > limit_60 and c['component_length'] <= limit_30 , contigs))
    large = list(filter(lambda c: c['component_length'] > limit_30, contigs))
    
    small_base_lengths = list(map(lambda c: c['length'] , small))
    medium_base_lengths = list(map(lambda c: c['length'] , medium))
    large_base_lengths = list(map(lambda c: c['length'] , large))
    total_base_lengths = list(map(lambda c: c['length'] , contigs))
    
    small_vertex_lengths = list(map(lambda c: c['vertices'] , small))
    medium_vertex_lengths = list(map(lambda c: c['vertices'] , medium))
    large_vertex_lengths = list(map(lambda c: c['vertices'] , large))
    total_vertex_lengths = list(map(lambda c: c['vertices'] , contigs))
    
    
    
    
    return { 
        'small':
        {
            'vertex_lengths': 
            {
                'median': median(small_vertex_lengths),
                'mean': mean(small_vertex_lengths),
                'stdev': stdev(small_vertex_lengths)
            },
            'base_lengths':
            {
                'median': median(small_base_lengths),
                'mean': mean(small_base_lengths),
                'stdev': stdev(small_base_lengths)
            },
            'number_of_contigs': len(small)            
        },
        'total':
        {
            'vertex_lengths': 
            {
                'median': median(total_vertex_lengths),
                'mean': mean(total_vertex_lengths),
                'stdev': stdev(total_vertex_lengths)
            },
            'base_lengths':
            {
                'median': median(total_base_lengths),
                'mean': mean(total_base_lengths),
                'stdev': stdev(total_base_lengths)
            },
            'number_of_contigs': len(contigs)
            
        }
    }
    
pprint(compute_contigs_table(-1, limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'base_lengths': {'mean': 830.4375,
                            'median': 403.5,
                            'stdev': 939.2898942835192},
           'number_of_contigs': 160,
           'vertex_lengths': {'mean': 2.25,
                              'median': 2.0,
                              'stdev': 0.8087571021725035}},
 'total': {'base_lengths': {'mean': 830.4375,
                            'median': 403.5,
                            'stdev': 939.2898942835192},
           'number_of_contigs': 160,
           'vertex_lengths': {'mean': 2.25,
                              'median': 2.0,
                              'stdev': 0.8087571021725035}}}


In [10]:
def compute_contigs_table_unitigs(limit_60, limit_30, components, vertices_inv):
    contigs = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            unitigs = component['unitigs']

            for unitig in unitigs:
                d = dict()
                d['component'] = i
                d['component_length'] = component['len']
                d['vertices'] = len(unitig)
                d['length'] = base_length(unitig, vertices_inv)

                contigs.append(d)
                
    
    
    small = list(filter(lambda c: c['component_length'] <= limit_60, contigs))
    medium = list(filter(lambda c: c['component_length'] > limit_60 and c['component_length'] <= limit_30 , contigs))
    large = list(filter(lambda c: c['component_length'] > limit_30, contigs))
    
    small_base_lengths = list(map(lambda c: c['length'] , small))
    medium_base_lengths = list(map(lambda c: c['length'] , medium))
    large_base_lengths = list(map(lambda c: c['length'] , large))
    total_base_lengths = list(map(lambda c: c['length'] , contigs))
    
    small_vertex_lengths = list(map(lambda c: c['vertices'] , small))
    medium_vertex_lengths = list(map(lambda c: c['vertices'] , medium))
    large_vertex_lengths = list(map(lambda c: c['vertices'] , large))
    total_vertex_lengths = list(map(lambda c: c['vertices'] , contigs))
    
    
    
    
    return { 
        'small':
        {
            'vertex_lengths': 
            {
                'median': median(small_vertex_lengths),
                'mean': mean(small_vertex_lengths),
                'stdev': stdev(small_vertex_lengths)
            },
            'base_lengths':
            {
                'median': median(small_base_lengths),
                'mean': mean(small_base_lengths),
                'stdev': stdev(small_base_lengths)
            },
            'number_of_contigs': len(small)            
        },
        'total':
        {
            'vertex_lengths': 
            {
                'median': median(total_vertex_lengths),
                'mean': mean(total_vertex_lengths),
                'stdev': stdev(total_vertex_lengths)
            },
            'base_lengths':
            {
                'median': median(total_base_lengths),
                'mean': mean(total_base_lengths),
                'stdev': stdev(total_base_lengths)
            },
            'number_of_contigs': len(contigs)
            
        }
    }

pprint(compute_contigs_table_unitigs(limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'base_lengths': {'mean': 801.463768115942,
                            'median': 403.5,
                            'stdev': 915.7881305943654},
           'number_of_contigs': 138,
           'vertex_lengths': {'mean': 2.1231884057971016,
                              'median': 2.0,
                              'stdev': 0.39063673551608824}},
 'total': {'base_lengths': {'mean': 801.463768115942,
                            'median': 403.5,
                            'stdev': 915.7881305943654},
           'number_of_contigs': 138,
           'vertex_lengths': {'mean': 2.1231884057971016,
                              'median': 2.0,
                              'stdev': 0.39063673551608824}}}


In [11]:
def compute_fixed_l_table(over_width, limit_60, limit_30, components, vertices_inv):
    transcripts = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component['experiments'].get(l, None) is None:
                l = str(2*width)
                
            if component['experiments'].get(l, None) is not None:
                safe_paths_info = component['experiments'][l]

                for j, transcript in enumerate(component['transcript_paths']):
                    transcript = transcript['transcript_path']
                    d = dict()
                    d['component'] = i
                    d['length'] = base_length(transcript, vertices_inv)
                    d['transcript'] = transcript
                    
                    d['e_size_density_vertex'] = safe_paths_info['e_sizes_vertex'][j]/len(transcript)
                    d['e_size_density_bases'] = safe_paths_info['e_size_bases'][j]/d['length']
                    d['max_prop_cov_bases'] = safe_paths_info['max_cov_bases'][j]/d['length']
                    d['max_prop_cov_vertex'] = safe_paths_info['max_cov_vertex'][j]/len(transcript)
                    
                    transcripts.append(d)
    
    
    
    small = list(filter(lambda t: t['length'] <= limit_60, transcripts))
    medium = list(filter(lambda t: t['length'] > limit_60 and t['length'] <= limit_30 , transcripts))
    large = list(filter(lambda t: t['length'] > limit_30, transcripts))
    
    
    small_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , small))
    medium_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , medium))
    large_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , large))
    
    small_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , small))
    medium_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , medium))
    large_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , large))
    
    
    small_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , small))
    medium_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , medium))
    large_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , large))
    
    small_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , small))
    medium_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , medium))
    large_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , large))
    
    
    
    return { 
        'small':
        {
            'e_size_density_vertex': 
            {
                'median': median(small_e_size_densities_vertex),
                'mean': mean(small_e_size_densities_vertex),
                'stdev': stdev(small_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(small_e_size_densities_bases),
                'mean': mean(small_e_size_densities_bases),
                'stdev': stdev(small_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(small_max_prop_cov_vertex),
                'mean': mean(small_max_prop_cov_vertex),
                'stdev': stdev(small_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(small_max_prop_cov_bases),
                'mean': mean(small_max_prop_cov_bases),
                'stdev': stdev(small_max_prop_cov_bases)
            }
            
        }
        ,
        'medium' :
        {
            'e_size_density_vertex': 
            {
                'median': median(medium_e_size_densities_vertex),
                'mean': mean(medium_e_size_densities_vertex),
                'stdev': stdev(medium_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(medium_e_size_densities_bases),
                'mean': mean(medium_e_size_densities_bases),
                'stdev': stdev(medium_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(medium_max_prop_cov_vertex),
                'mean': mean(medium_max_prop_cov_vertex),
                'stdev': stdev(medium_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(medium_max_prop_cov_bases),
                'mean': mean(medium_max_prop_cov_bases),
                'stdev': stdev(medium_max_prop_cov_bases)
            }
        }
    }
    
pprint(compute_fixed_l_table(0, limit_60, limit_30, components, vertices_inv))

{'medium': {'e_size_density_bases': {'mean': 0.9829975218027274,
                                     'median': 1.0,
                                     'stdev': 0.03980865683913678},
            'e_size_density_vertex': {'mean': 0.9480864197530864,
                                      'median': 1.0,
                                      'stdev': 0.1134371102717499},
            'max_prop_cov_bases': {'mean': 0.995549128304009,
                                   'median': 1.0,
                                   'stdev': 0.017238151954679114},
            'max_prop_cov_vertex': {'mean': 0.9777777777777777,
                                    'median': 1.0,
                                    'stdev': 0.08606629658238704}},
 'small': {'e_size_density_bases': {'mean': 0.97189134237842,
                                    'median': 1.0,
                                    'stdev': 0.09591655296680723},
           'e_size_density_vertex': {'mean': 0.9702865707434053,
                     

In [12]:
def compute_fixed_l_table_unitigs(limit_60, limit_30, components, vertices_inv):
    transcripts = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            unitigs = component['unitigs']

            for j, transcript in enumerate(component['transcript_paths']):
                transcript = transcript['transcript_path']
                
                d = dict()
                d['component'] = i
                d['length'] = base_length(transcript, vertices_inv)
                d['transcript'] = transcript
                
                
                d['e_size_density_vertex'] =  component['e_sizes_vertex'][j]/len(transcript)
                d['e_size_density_bases'] =  component['e_size_bases'][j]/d['length']
                d['max_prop_cov_bases'] =  component['max_cov_bases'][j]/d['length']
                d['max_prop_cov_vertex'] =  component['max_cov_vertex'][j]/len(transcript)

                transcripts.append(d)

    
    
    small = list(filter(lambda t: t['length'] <= limit_60, transcripts))
    medium = list(filter(lambda t: t['length'] > limit_60 and t['length'] <= limit_30 , transcripts))
    large = list(filter(lambda t: t['length'] > limit_30, transcripts))
    
    
    small_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , small))
    medium_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , medium))
    large_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , large))
    
    small_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , small))
    medium_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , medium))
    large_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , large))
    
    
    small_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , small))
    medium_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , medium))
    large_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , large))
    
    small_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , small))
    medium_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , medium))
    large_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , large))

    
    
    return { 
        'small':
        {
            'e_size_density_vertex': 
            {
                'median': median(small_e_size_densities_vertex),
                'mean': mean(small_e_size_densities_vertex),
                'stdev': stdev(small_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(small_e_size_densities_bases),
                'mean': mean(small_e_size_densities_bases),
                'stdev': stdev(small_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(small_max_prop_cov_vertex),
                'mean': mean(small_max_prop_cov_vertex),
                'stdev': stdev(small_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(small_max_prop_cov_bases),
                'mean': mean(small_max_prop_cov_bases),
                'stdev': stdev(small_max_prop_cov_bases)
            }
            
        }
        ,
        'medium' :
        {
            'e_size_density_vertex': 
            {
                'median': median(medium_e_size_densities_vertex),
                'mean': mean(medium_e_size_densities_vertex),
                'stdev': stdev(medium_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(medium_e_size_densities_bases),
                'mean': mean(medium_e_size_densities_bases),
                'stdev': stdev(medium_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(medium_max_prop_cov_vertex),
                'mean': mean(medium_max_prop_cov_vertex),
                'stdev': stdev(medium_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(medium_max_prop_cov_bases),
                'mean': mean(medium_max_prop_cov_bases),
                'stdev': stdev(medium_max_prop_cov_bases)
            }
        }
    }

pprint(compute_fixed_l_table_unitigs(limit_60, limit_30, components, vertices_inv))

{'medium': {'e_size_density_bases': {'mean': 0.7261533067390066,
                                     'median': 0.9726511291185174,
                                     'stdev': 0.3957126823491937},
            'e_size_density_vertex': {'mean': 0.6583333333333333,
                                      'median': 0.875,
                                      'stdev': 0.3508068930201859},
            'max_prop_cov_bases': {'mean': 0.7600505509465225,
                                   'median': 1.0,
                                   'stdev': 0.4029466245898773},
            'max_prop_cov_vertex': {'mean': 0.7333333333333333,
                                    'median': 1.0,
                                    'stdev': 0.40237390808147827}},
 'small': {'e_size_density_bases': {'mean': 0.8260814796255316,
                                    'median': 0.9345334993137349,
                                    'stdev': 0.2880092345297924},
           'e_size_density_vertex': {'mean': 0.77978117

In [13]:
def compute_fixed_rd_table(over_width, limit_60, limit_30, components, vertices_inv):
    genes = list()
    transcripts = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
                
                
            if component['experiments'].get(l, None) is None:
                l = str(2*width)
                        
            if component['experiments'].get(l, None) is not None:
                safe_paths_info = component['experiments'][l]

                for j, transcript in enumerate(component['transcript_paths']):
                    transcript = transcript['transcript_path']
                    
                    d = dict()
                    d['component'] = i
                    d['component_length'] = component['len']
                    d['length'] = base_length(transcript, vertices_inv)
                    d['transcript'] = transcript
                    
                    
                    d['e_size_density_vertex'] = safe_paths_info['e_sizes_vertex'][j]/len(transcript)
                    d['e_size_density_bases'] = safe_paths_info['e_size_bases'][j]/d['length']
                    d['max_prop_cov_bases'] = safe_paths_info['max_cov_bases'][j]/d['length']
                    d['max_prop_cov_vertex'] = safe_paths_info['max_cov_vertex'][j]/len(transcript)
                    
                    transcripts.append(d)
                 
                d = dict()
                
                tp_bases = 0
                tp_vertex = 0
                for safe_path in safe_paths_info['true_positives']:
                    tp_bases += base_length(safe_path, vertices_inv)
                    tp_vertex += len(safe_path)
                    
                p_bases = tp_bases
                p_vertex = tp_vertex
                for safe_path in safe_paths_info['false_positives']:
                    p_bases += base_length(safe_path, vertices_inv)
                    p_vertex += len(safe_path)
                
                d['precision_bases'] = 1.0*tp_bases/p_bases
                d['precision_vertex'] = 1.0*tp_vertex/p_vertex
                d['component_length'] = components[i]['len']
                genes.append(d)
    
    small = list(filter(lambda t: t['component_length'] <= limit_60, transcripts))
    medium = list(filter(lambda t: t['component_length'] > limit_60 and t['component_length'] <= limit_30 , transcripts))
    large = list(filter(lambda t: t['component_length'] > limit_30, transcripts))
    
    small_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , small))
    medium_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , medium))
    large_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , large))
    
    small_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , small))
    medium_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , medium))
    large_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , large))
    
    
    small_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , small))
    medium_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , medium))
    large_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , large))
    
    small_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , small))
    medium_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , medium))
    large_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , large))
    
    
    s = list(filter(lambda t: t['component_length'] <= limit_60, genes))
    m = list(filter(lambda t: t['component_length'] > limit_60 and t['component_length'] <= limit_30 , genes))
    l = list(filter(lambda t: t['component_length'] > limit_30, genes))
    
    s_precisions_bases = list(map(lambda t: t['precision_bases'] , s))
    m_precisions_bases = list(map(lambda t: t['precision_bases'] , m))
    l_precisions_bases = list(map(lambda t: t['precision_bases'] , l))
    
    s_precisions_vertex = list(map(lambda t: t['precision_vertex'] , s))
    m_precisions_vertex = list(map(lambda t: t['precision_vertex'] , m))
    l_precisions_vertex = list(map(lambda t: t['precision_vertex'] , l))
    
    
    return { 
        'small':
        {
            'e_size_density_vertex': 
            {
                'median': median(small_e_size_densities_vertex),
                'mean': mean(small_e_size_densities_vertex),
                'stdev': stdev(small_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(small_e_size_densities_bases),
                'mean': mean(small_e_size_densities_bases),
                'stdev': stdev(small_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(small_max_prop_cov_vertex),
                'mean': mean(small_max_prop_cov_vertex),
                'stdev': stdev(small_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(small_max_prop_cov_bases),
                'mean': mean(small_max_prop_cov_bases),
                'stdev': stdev(small_max_prop_cov_bases)
            },
            'precision_bases':
            {
                'median': median(s_precisions_bases),
                'mean': mean(s_precisions_bases),
                'stdev': stdev(s_precisions_bases)
            },
            'precision_vertex':
            {
                'median': median(s_precisions_vertex),
                'mean': mean(s_precisions_vertex),
                'stdev': stdev(s_precisions_vertex)
            }   
        }
    }
    
pprint(compute_fixed_rd_table(0, limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'e_size_density_bases': {'mean': 0.9731474801138148,
                                    'median': 1.0,
                                    'stdev': 0.09167205418441864},
           'e_size_density_vertex': {'mean': 0.9683298685782556,
                                     'median': 1.0,
                                     'stdev': 0.09708365157643736},
           'max_prop_cov_bases': {'mean': 0.9858553658143903,
                                  'median': 1.0,
                                  'stdev': 0.07826097480010293},
           'max_prop_cov_vertex': {'mean': 0.9849462365591398,
                                   'median': 1.0,
                                   'stdev': 0.07684184281948615},
           'precision_bases': {'mean': 0.2054794520547945,
                               'median': 0.0,
                               'stdev': 0.4068477776211284},
           'precision_vertex': {'mean': 0.2054794520547945,
                                'median': 0.0,
     

In [14]:
def compute_fixed_rd_table_unitigs(limit_60, limit_30, components, vertices_inv):
    transcripts = list()
    genes = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            unitigs = component['unitigs']

            for j, transcript in enumerate(component['transcript_paths']):
                transcript = transcript['transcript_path']
                
                d = dict()
                d['component'] = i
                d['component_length'] = components[i]['len']
                d['length'] = base_length(transcript, vertices_inv)
                d['transcript'] = transcript
                
                
                d['e_size_density_vertex'] =  component['e_sizes_vertex'][j]/len(transcript)
                d['e_size_density_bases'] =  component['e_size_bases'][j]/d['length']
                d['max_prop_cov_bases'] =  component['max_cov_bases'][j]/d['length']
                d['max_prop_cov_vertex'] =  component['max_cov_vertex'][j]/len(transcript)

                transcripts.append(d)
                
            d = dict()
                
            tp_bases = 0
            tp_vertex = 0
            for safe_path in component['true_positives']:
                tp_bases += base_length(safe_path, vertices_inv)
                tp_vertex += len(safe_path)

            p_bases = tp_bases
            p_vertex = tp_vertex
            for safe_path in component['false_positives']:
                p_bases += base_length(safe_path, vertices_inv)
                p_vertex += len(safe_path)

            d['precision_bases'] = 1.0 if p_bases == 0 else 1.0*tp_bases/p_bases
            d['precision_vertex'] = 1.0 if p_vertex == 0 else 1.0*tp_vertex/p_vertex
            d['component_length'] = component['len']
            genes.append(d)

    
    
    small = list(filter(lambda t: t['component_length'] <= limit_60, transcripts))
    medium = list(filter(lambda t: t['component_length'] > limit_60 and t['component_length'] <= limit_30 , transcripts))
    large = list(filter(lambda t: t['component_length'] > limit_30, transcripts))
    
    small_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , small))
    medium_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , medium))
    large_e_size_densities_vertex = list(map(lambda t: t['e_size_density_vertex'] , large))
    
    small_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , small))
    medium_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , medium))
    large_e_size_densities_bases = list(map(lambda t: t['e_size_density_bases'] , large))
    
    
    small_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , small))
    medium_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , medium))
    large_max_prop_cov_vertex = list(map(lambda t: t['max_prop_cov_vertex'] , large))
    
    small_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , small))
    medium_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , medium))
    large_max_prop_cov_bases = list(map(lambda t: t['max_prop_cov_bases'] , large))
    
    
    s = list(filter(lambda t: t['component_length'] <= limit_60, genes))
    m = list(filter(lambda t: t['component_length'] > limit_60 and t['component_length'] <= limit_30 , genes))
    l = list(filter(lambda t: t['component_length'] > limit_30, genes))
    
    s_precisions_bases = list(map(lambda t: t['precision_bases'] , s))
    m_precisions_bases = list(map(lambda t: t['precision_bases'] , m))
    l_precisions_bases = list(map(lambda t: t['precision_bases'] , l))
    
    s_precisions_vertex = list(map(lambda t: t['precision_vertex'] , s))
    m_precisions_vertex = list(map(lambda t: t['precision_vertex'] , m))
    l_precisions_vertex = list(map(lambda t: t['precision_vertex'] , l))
    
    
    return { 
        'small':
        {
            'e_size_density_vertex': 
            {
                'median': median(small_e_size_densities_vertex),
                'mean': mean(small_e_size_densities_vertex),
                'stdev': stdev(small_e_size_densities_vertex)
            },
            'e_size_density_bases': 
            {
                'median': median(small_e_size_densities_bases),
                'mean': mean(small_e_size_densities_bases),
                'stdev': stdev(small_e_size_densities_bases)
            },
            'max_prop_cov_vertex':
            {
                'median': median(small_max_prop_cov_vertex),
                'mean': mean(small_max_prop_cov_vertex),
                'stdev': stdev(small_max_prop_cov_vertex)
            },
            'max_prop_cov_bases':
            {
                'median': median(small_max_prop_cov_bases),
                'mean': mean(small_max_prop_cov_bases),
                'stdev': stdev(small_max_prop_cov_bases)
            },
            'precision_bases':
            {
                'median': median(s_precisions_bases),
                'mean': mean(s_precisions_bases),
                'stdev': stdev(s_precisions_bases)
            },
            'precision_vertex':
            {
                'median': median(s_precisions_vertex),
                'mean': mean(s_precisions_vertex),
                'stdev': stdev(s_precisions_vertex)
            }
            
        }
    }

pprint(compute_fixed_rd_table_unitigs(limit_size_60, limit_size_30, components, vertices_inv))

{'small': {'e_size_density_bases': {'mean': 0.8175051832082422,
                                    'median': 0.9396737457679286,
                                    'stdev': 0.29941926854180095},
           'e_size_density_vertex': {'mean': 0.7686424731182796,
                                     'median': 0.875,
                                     'stdev': 0.26637524498547194},
           'max_prop_cov_bases': {'mean': 0.8774557529429382,
                                  'median': 1.0,
                                  'stdev': 0.28600498556414067},
           'max_prop_cov_vertex': {'mean': 0.8723655913978494,
                                   'median': 1.0,
                                   'stdev': 0.2851067412935289},
           'precision_bases': {'mean': 1.0, 'median': 1.0, 'stdev': 0.0},
           'precision_vertex': {'mean': 1.0, 'median': 1.0, 'stdev': 0.0}}}


In [15]:
def compute_time_table(over_width, limit_60, limit_30, components, vertices_inv, algorithm_experiments='experiments'):
    time = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component[algorithm_experiments].get(l, None) is not None:
                safe_paths_info = component[algorithm_experiments][l]

                d = dict()
                d['time'] = component[algorithm_experiments][l]['time_main']+component[algorithm_experiments][l]['time_filter']
                d['component_length'] = component['len']
                time.append(d)
    
    small = list(filter(lambda t: t['component_length'] <= limit_60, time))
    medium = list(filter(lambda t: t['component_length'] > limit_60 and t['component_length'] <= limit_30 , time))
    large = list(filter(lambda t: t['component_length'] > limit_30, time))
    
    return { 
        'small':
        {
            'time': sum(list(map(lambda x: x['time'] , small)))
        }
        ,
        'medium' :
        {
            'time': sum(list(map(lambda x: x['time'] , medium)))
        },
        'large':
        {
            'time': sum(list(map(lambda x: x['time'] , large)))
        },
        'all':
        {
            'time': sum(list(map(lambda x: x['time'] , time)))
        }
    }

pprint(compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv))
pprint(compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_two_finger'))
pprint(compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_unoptimized'))
pprint(compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_heuristic'))

{'all': {'time': 3191},
 'large': {'time': 0},
 'medium': {'time': 0},
 'small': {'time': 3191}}
{'all': {'time': 2252},
 'large': {'time': 0},
 'medium': {'time': 0},
 'small': {'time': 2252}}
{'all': {'time': 2641},
 'large': {'time': 0},
 'medium': {'time': 0},
 'small': {'time': 2641}}
{'all': {'time': 3174},
 'large': {'time': 0},
 'medium': {'time': 0},
 'small': {'time': 3174}}


In [16]:
def compute_cdss_coverage_table(over_width, limit_60, limit_30, components, vertices_inv):
    cdss = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            width = component['width']
            l = str(width+over_width)
            if over_width == -1:
                l = str(len(component['transcript_paths']))
            if over_width == -2:
                l = str(2*component['width'])
            if component['experiments'].get(l, None) is None:
                l = str(2*width)
                
            if component['experiments'].get(l, None) is not None:
                safe_paths_info = component['experiments'][l]

                for CDSs in safe_paths_info['cdss_coverage']:
                    for CDS in CDSs:
                        d = dict()
                        d['component'] = i
                        d['length'] = CDS['length']
                        d['max_cov_rel'] = CDS['max_cov']/d['length']

                        cdss.append(d)

    
    small = list(filter(lambda t: t['length'] <= limit_60, cdss))
    medium = list(filter(lambda t: t['length'] > limit_60 and t['length'] <= limit_30 , cdss))
    large = list(filter(lambda t: t['length'] > limit_30, cdss))
    
    
    small_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , small))
    medium_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , medium))
    large_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , large))

    
    
    return { 
        'small':
        {
            'max_cov_rel': 
            {
                'median': median(small_max_cov_rel),
                'mean': mean(small_max_cov_rel),
                'stdev': stdev(small_max_cov_rel)
            }
        }
        ,
        'medium' :
        {
            'max_cov_rel': 
            {
                'median': median(medium_max_cov_rel),
                'mean': mean(medium_max_cov_rel),
                'stdev': stdev(medium_max_cov_rel)
            }
        },
        'large':
        {
            'max_cov_rel': 
            {
                'median': median(large_max_cov_rel),
                'mean': mean(large_max_cov_rel),
                'stdev': stdev(large_max_cov_rel)
            }
        }
    }
    
pprint(compute_cdss_coverage_table(0, limit_cdss_60, limit_cdss_30, components, vertices_inv))

{'large': {'max_cov_rel': {'mean': 0.9988480795781559,
                           'median': 1.0,
                           'stdev': 0.008772764568775484}},
 'medium': {'max_cov_rel': {'mean': 0.997726595110316,
                            'median': 1.0,
                            'stdev': 0.02108268963272353}},
 'small': {'max_cov_rel': {'mean': 1.0, 'median': 1.0, 'stdev': 0.0}}}


In [17]:
def compute_cdss_coverage_table_unitigs(limit_60, limit_30, components, vertices_inv):
    cdss = list()
    for i, component in enumerate(components):
        if component['len'] > 2 and len(component['transcript_paths']) > 1:
            for CDSs in component['cdss_coverage']:
                for CDS in CDSs:
                    d = dict()
                    d['component'] = i
                    d['length'] = CDS['length']
                    d['max_cov_rel'] = CDS['max_cov']/d['length']

                    cdss.append(d)

    
    
    small = list(filter(lambda t: t['length'] <= limit_60, cdss))
    medium = list(filter(lambda t: t['length'] > limit_60 and t['length'] <= limit_30 , cdss))
    large = list(filter(lambda t: t['length'] > limit_30, cdss))
    
    
    small_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , small))
    medium_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , medium))
    large_max_cov_rel = list(map(lambda t: t['max_cov_rel'] , large))

    
    
    return { 
        'small':
        {
            'max_cov_rel': 
            {
                'median': median(small_max_cov_rel),
                'mean': mean(small_max_cov_rel),
                'stdev': stdev(small_max_cov_rel)
            }
        }
        ,
        'medium' :
        {
            'max_cov_rel': 
            {
                'median': median(medium_max_cov_rel),
                'mean': mean(medium_max_cov_rel),
                'stdev': stdev(medium_max_cov_rel)
            }
        },
        'large':
        {
            'max_cov_rel': 
            {
                'median': median(large_max_cov_rel),
                'mean': mean(large_max_cov_rel),
                'stdev': stdev(large_max_cov_rel)
            }
        }
    }

pprint(compute_cdss_coverage_table_unitigs(limit_cdss_60, limit_cdss_30, components, vertices_inv))

{'large': {'max_cov_rel': {'mean': 0.8171113658381152,
                           'median': 1.0,
                           'stdev': 0.36634504631060316}},
 'medium': {'max_cov_rel': {'mean': 0.8294385505652703,
                            'median': 1.0,
                            'stdev': 0.35807583327648657}},
 'small': {'max_cov_rel': {'mean': 0.5264509729433118,
                           'median': 0.8632075471698113,
                           'stdev': 0.4988837326887965}}}


In [18]:
## And peak memory
max(list(map(lambda x: 0 if x.get('experiments', None) is None else max(list(map(lambda k: x['experiments'][k]['peak_memory'], x['experiments'].keys()))),
             components))), max(list(map(lambda x: 0 if x.get('experiments_two_finger', None) is None else max(list(map(lambda k: x['experiments_two_finger'][k]['peak_mem'], x['experiments_two_finger'].keys()))),
             components))), max(list(map(lambda x: 0 if x.get('experiments_unoptimized', None) is None else max(list(map(lambda k: x['experiments_unoptimized'][k]['peak_mem'], x['experiments_unoptimized'].keys()))),
             components))), max(list(map(lambda x: 0 if x.get('experiments_heuristic', None) is None else max(list(map(lambda k: x['experiments_heuristic'][k]['peak_mem'], x['experiments_heuristic'].keys()))),
             components)))

(3624, 3688, 3680, 3668)

In [19]:
## LATEX TABLE GENERATORS

In [20]:
import math
def trunc(number, decimals):
    factor = 10.0 ** decimals
    return math.trunc(number * factor) / factor

In [21]:
def merge(source, destination):
    """
    run me with nosetests --with-doctest file.py

    >>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
    >>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
    >>> merge(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
    True
    """
    for key, value in source.items():
        if isinstance(value, dict):
            # get node or create one
            node = destination.setdefault(key, {})
            merge(value, node)
        else:
            destination[key] = value

    return destination

In [22]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{n:.2f}"

def format_cell(data, key):
    
    mdata = data[key]
    return f"""\\cellx{{2}}{{{format_number(mdata['abs_improvements_base']['mean'])}\\\\{format_number(mdata['rel_improvements_base']['mean'])}}}  & \\cellx{{2}}{{{format_number(mdata['abs_improvements_vertex']['mean'])}\\\\{format_number(mdata['rel_improvements_vertex']['mean'])}}}"""

def format_row(data, key):
    
    mdata = data[key]
    return f"""${key}$  & {format_cell(mdata, 'small')} & {"-- & --"}  & {"-- & --"} \\\\\\hline"""

def format_first_paper_table(data):
    a = f"""\\begin{{center}}
\\begin{{tabular}}{{|c|cc|cc|cc|}}
\\hline
 & \\multicolumn{{2}}{{c|}}{{small graphs (3-{limit_size_60} vertices)}}  & \\multicolumn{{2}}{{c|}}{{medium graphs ({limit_size_60+1}-{limit_size_30} vertices)}} & \\multicolumn{{2}}{{c|}}{{large graphs ({limit_size_30+1}-{limit_size_10} vertices)}}    \\\\
$\\ell$  & bases     & vertices      & bases     & vertices     & bases     & vertices     \\\\\\hline\\hline
{format_row(data, 'k')}
{format_row(data, 'k+1')}
{format_row(data, 't')}
{format_row(data, '2k')}
\\end{{tabular}} 
\\end{{center}}"""
    return a


d = {
    'k': {'small': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    'k+1': {'small': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    't': {'small': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    '2k': {'small': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'abs_improvements_base': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'abs_improvements_vertex': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}}
}

d2 = {
    'k': {'small': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'medium': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'large': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}}},
    'k+1': {'small': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'medium': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'large': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}}},
    't': {'small': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'medium': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'large': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}}},
    '2k': {'small': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'medium': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}},'large': {'rel_improvements_base': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}, 'rel_improvements_vertex': {'mean':  0.24, 'median': 0.22, 'stdev': 0.1}}}
}
print(format_first_paper_table(merge(d,d2)))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small graphs (3-15 vertices)}  & \multicolumn{2}{c|}{medium graphs (16-50 vertices)} & \multicolumn{2}{c|}{large graphs (51-725 vertices)}    \\
$\ell$  & bases     & vertices      & bases     & vertices     & bases     & vertices     \\\hline\hline
$k$  & \cellx{2}{0.23\\0.24}  & \cellx{2}{0.23\\0.24} & -- & --  & -- & -- \\\hline
$k+1$  & \cellx{2}{0.23\\0.24}  & \cellx{2}{0.23\\0.24} & -- & --  & -- & -- \\\hline
$t$  & \cellx{2}{0.23\\0.24}  & \cellx{2}{0.23\\0.24} & -- & --  & -- & -- \\\hline
$2k$  & \cellx{2}{0.23\\0.24}  & \cellx{2}{0.23\\0.24} & -- & --  & -- & -- \\\hline
\end{tabular} 
\end{center}


In [23]:
abs_impr_dict = dict()

abs_impr_dict['k'] = compute_abs_improvement_table(0, limit_size_60, limit_size_30, components, vertices_inv)
abs_impr_dict['k+1'] = compute_abs_improvement_table(1, limit_size_60, limit_size_30, components, vertices_inv)
abs_impr_dict['t'] = compute_abs_improvement_table(-1, limit_size_60, limit_size_30, components, vertices_inv)
abs_impr_dict['2k'] = compute_abs_improvement_table(-2, limit_size_60, limit_size_30, components,vertices_inv)

In [24]:
rel_impr_dict = dict()

rel_impr_dict['k'] = compute_rel_improvement_table(0, limit_size_60, limit_size_30, components, vertices_inv)
rel_impr_dict['k+1'] = compute_rel_improvement_table(1, limit_size_60, limit_size_30, components, vertices_inv)
rel_impr_dict['t'] = compute_rel_improvement_table(-1, limit_size_60, limit_size_30, components, vertices_inv)
rel_impr_dict['2k'] = compute_rel_improvement_table(-2, limit_size_60, limit_size_30, components,vertices_inv)

In [25]:
impr_dict = merge(abs_impr_dict, rel_impr_dict)
print(format_first_paper_table(impr_dict))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small graphs (3-15 vertices)}  & \multicolumn{2}{c|}{medium graphs (16-50 vertices)} & \multicolumn{2}{c|}{large graphs (51-725 vertices)}    \\
$\ell$  & bases     & vertices      & bases     & vertices     & bases     & vertices     \\\hline\hline
$k$  & \cellx{2}{709.32\\2.91}  & \cellx{2}{1.19\\1.56} & -- & --  & -- & -- \\\hline
$k+1$  & \cellx{2}{8.88\\1.01}  & \cellx{2}{0.05\\1.01} & -- & --  & -- & -- \\\hline
$t$  & \cellx{2}{8.88\\1.01}  & \cellx{2}{0.05\\1.01} & -- & --  & -- & -- \\\hline
$2k$  & \cellx{2}{8.88\\1.01}  & \cellx{2}{0.05\\1.01} & -- & --  & -- & -- \\\hline
\end{tabular} 
\end{center}


In [26]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key, variant):
    
    mdata = data[key]
    return f"""\\cell{{2}}{{{format_number(mdata['e_size_density'+variant]['mean'])}\\\\{format_number(mdata['e_size_density'+variant]['median'])}}} & \\cell{{2}}{{{format_number(mdata['max_prop_cov'+variant]['mean'])}\\\\{format_number(mdata['max_prop_cov'+variant]['median'])}}}"""

def format_row(data, key, variant):
    
    mdata = data[key]
    return f"""{format_cell(mdata, 'small', variant)} & {format_cell(mdata, 'medium', variant)} & {"-- & --"} \\\\\\hline"""
    

def format_second_paper_table(data, variant=''):
    
    a = f"""\\begin{{center}}
\\begin{{tabular}}{{|c|cc|cc|cc|}}
\\hline
 & \\multicolumn{{2}}{{c|}}{{small (1-{limit_60} bases)}}    & \\multicolumn{{2}}{{c|}}{{medium ({limit_60+1}-{limit_30} bases)}} & \\multicolumn{{2}}{{c|}}{{large ({limit_30+1}-{limit_10} bases)}}    \\\\
$\\ell$  & \\texttt{{esr}}     & \\texttt{{mcr}}     & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{esr}}     & \\texttt{{mcr}}    \\\\\\hline\\hline
$k$  & {format_row(data, 'k', variant)}
$k+1$  & {format_row(data, 'k+1', variant)}
$t$  & {format_row(data, 't', variant)}
$2k$  & {format_row(data, '2k', variant)}\\hline
\\cell{{1}}{{$ST$-\\\\unitigs}}
   & {format_row(data, 'unitigs', variant)}
\\end{{tabular}} 
\\end{{center}}"""

    return a

d = {
    'k': {'small': {'e_size_density': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'e_size_density': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'e_size_density': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
    'k+1': {'small': {'e_size_density': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}, 'max_prop_cov': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}},'medium': {'e_size_density': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}, 'max_prop_cov': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}},'large': {'e_size_density': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}, 'max_prop_cov': {'mean': 0.3, 'median': 0.4, 'stdev': 0.3}}},
    't': {'small': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}},'medium': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}},'large': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}}},
    '2k': {'small': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}},'medium': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}},'large': {'e_size_density': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}, 'max_prop_cov': {'mean': 0.5, 'median': 0.6, 'stdev': 0.5}}},
    'unitigs': {'small': {'e_size_density': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}, 'max_prop_cov': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}},'medium': {'e_size_density': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}, 'max_prop_cov': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}},'large': {'e_size_density': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}, 'max_prop_cov': {'mean': 0.7, 'median': 0.8, 'stdev': 0.7}}},
}

print(format_second_paper_table(d))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small (1-2000 bases)}    & \multicolumn{2}{c|}{medium (2001-5000 bases)} & \multicolumn{2}{c|}{large (5001-205012 bases)}    \\
$\ell$  & \texttt{esr}     & \texttt{mcr}     & \texttt{esr}     & \texttt{mcr}    & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & -- & -- \\\hline
$k+1$  & \cell{2}{0.30\\0.40} & \cell{2}{0.30\\0.40} & \cell{2}{0.30\\0.40} & \cell{2}{0.30\\0.40} & -- & -- \\\hline
$t$  & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & -- & -- \\\hline
$2k$  & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & \cell{2}{0.50\\0.60} & -- & -- \\\hline\hline
\cell{1}{$ST$-\\unitigs}
   & \cell{2}{0.70\\0.80} & \cell{2}{0.70\\0.80} & \cell{2}{0.70\\0.80} & \cell{2}{0.70\\0.80} & -- & -- \\\hline
\end{tabular} 
\end{center}


In [27]:
fixed_l_dict = dict()
fixed_l_dict['unitigs'] = compute_fixed_l_table_unitigs(limit_60, limit_30, components, vertices_inv)
fixed_l_dict['k']  = compute_fixed_l_table(0, limit_60, limit_30, components, vertices_inv)
fixed_l_dict['k+1']  = compute_fixed_l_table(1, limit_60, limit_30, components, vertices_inv)
fixed_l_dict['t']  = compute_fixed_l_table(-1, limit_60, limit_30, components, vertices_inv)
fixed_l_dict['2k']  = compute_fixed_l_table(-2, limit_60, limit_30, components, vertices_inv)

In [28]:
print(format_second_paper_table(fixed_l_dict, '_bases'))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small (1-2000 bases)}    & \multicolumn{2}{c|}{medium (2001-5000 bases)} & \multicolumn{2}{c|}{large (5001-205012 bases)}    \\
$\ell$  & \texttt{esr}     & \texttt{mcr}     & \texttt{esr}     & \texttt{mcr}    & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{2}{0.97\\1.00} & \cell{2}{0.98\\1.00} & \cell{2}{0.98\\1.00} & \cell{2}{0.99\\1.00} & -- & -- \\\hline
$k+1$  & \cell{2}{0.90\\0.93} & \cell{2}{0.96\\1.00} & \cell{2}{0.90\\0.97} & \cell{2}{0.95\\1.00} & -- & -- \\\hline
$t$  & \cell{2}{0.90\\0.93} & \cell{2}{0.96\\1.00} & \cell{2}{0.90\\0.97} & \cell{2}{0.95\\1.00} & -- & -- \\\hline
$2k$  & \cell{2}{0.90\\0.93} & \cell{2}{0.96\\1.00} & \cell{2}{0.90\\0.97} & \cell{2}{0.95\\1.00} & -- & -- \\\hline\hline
\cell{1}{$ST$-\\unitigs}
   & \cell{2}{0.82\\0.93} & \cell{2}{0.88\\1.00} & \cell{2}{0.72\\0.97} & \cell{2}{0.76\\1.00} & -- & -- \\\hline
\end{tabular} 
\end{center}


In [29]:
## This is the fifth table
print(format_second_paper_table(fixed_l_dict, '_vertex'))

\begin{center}
\begin{tabular}{|c|cc|cc|cc|}
\hline
 & \multicolumn{2}{c|}{small (1-2000 bases)}    & \multicolumn{2}{c|}{medium (2001-5000 bases)} & \multicolumn{2}{c|}{large (5001-205012 bases)}    \\
$\ell$  & \texttt{esr}     & \texttt{mcr}     & \texttt{esr}     & \texttt{mcr}    & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{2}{0.97\\1.00} & \cell{2}{0.98\\1.00} & \cell{2}{0.94\\1.00} & \cell{2}{0.97\\1.00} & -- & -- \\\hline
$k+1$  & \cell{2}{0.84\\0.87} & \cell{2}{0.95\\1.00} & \cell{2}{0.80\\0.87} & \cell{2}{0.91\\1.00} & -- & -- \\\hline
$t$  & \cell{2}{0.84\\0.87} & \cell{2}{0.95\\1.00} & \cell{2}{0.80\\0.87} & \cell{2}{0.91\\1.00} & -- & -- \\\hline
$2k$  & \cell{2}{0.84\\0.87} & \cell{2}{0.95\\1.00} & \cell{2}{0.80\\0.87} & \cell{2}{0.91\\1.00} & -- & -- \\\hline\hline
\cell{1}{$ST$-\\unitigs}
   & \cell{2}{0.77\\0.87} & \cell{2}{0.88\\1.00} & \cell{2}{0.65\\0.87} & \cell{2}{0.73\\1.00} & -- & -- \\\hline
\end{tabular} 
\end{center}


In [30]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key, variant):
    
    mdata = data[key]
    return f"""\\cell{{1}}{{{format_number(mdata['precision'+variant]['mean'])}\\\\{format_number(mdata['precision'+variant]['median'])}}}  & \\cell{{1}}{{{format_number(mdata['e_size_density'+variant]['mean'])}\\\\{format_number(mdata['e_size_density'+variant]['median'])}}}  & \\cell{{1}}{{{format_number(mdata['max_prop_cov'+variant]['mean'])}\\\\{format_number(mdata['max_prop_cov'+variant]['median'])}}}"""
    
def format_row(data, key, variant):
    
    mdata = data[key]
    return f"""{format_cell(mdata, 'small', variant)}  & {"-- & -- & --"} & {"-- & -- & --"}  \\\\\\hline"""
    

def format_third_paper_table(data, variant=''):
    
    a = f"""\\begin{{center}}
    \\begin{{tabular}}{{|c|ccc|ccc|ccc|}}
\\hline
 & \\multicolumn{{3}}{{c|}}{{small graphs (3-{limit_size_60} vertices)}}        & \\multicolumn{{3}}{{c|}}{{medium graphs ({limit_size_60+1}-{limit_size_30} vertices)}}     & \\multicolumn{{3}}{{c|}}{{large graphs ({limit_size_30+1}-{limit_size_10} vertices)}}          \\\\
$\\ell$  & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    & \\texttt{{prec}}   & \\texttt{{esr}}     & \\texttt{{mcr}}    \\\\\\hline\\hline
$k$  & {format_row(data, 'k', variant)}
$k+1$  & {format_row(data, 'k+1', variant)}
$t$  & {format_row(data, 't', variant)}
$2k$  & {format_row(data, '2k', variant)}\\hline
\\cell{{1}}{{$ST$-\\\\unitigs}} 
  & {format_row(data, 'unitigs', variant)}
\\end{{tabular}} 
\\end{{center}}"""
    
    return a

d = {
    'k': {'small': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    'k+1': {'small': {'precision': {'mean': 0.34, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    't': {'small': {'precision': {'mean': 0.45, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    '2k': {'small': {'precision': {'mean': 0.46, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}},
    'unitigs': {'small': {'precision': {'mean': 0.56, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'medium': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}},'large': {'precision': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'e_size_density': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}, 'max_prop_cov': {'mean': 0.23, 'median': 0.22, 'stdev': 0.1}}}
}

print(format_third_paper_table(d))


\begin{center}
    \begin{tabular}{|c|ccc|ccc|ccc|}
\hline
 & \multicolumn{3}{c|}{small graphs (3-15 vertices)}        & \multicolumn{3}{c|}{medium graphs (16-50 vertices)}     & \multicolumn{3}{c|}{large graphs (51-725 vertices)}          \\
$\ell$  & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\\0.22}  & -- & -- & -- & -- & -- & --  \\\hline
$k+1$  & \cell{1}{0.34\\0.22}  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\\0.22}  & -- & -- & -- & -- & -- & --  \\\hline
$t$  & \cell{1}{0.45\\0.22}  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\\0.22}  & -- & -- & -- & -- & -- & --  \\\hline
$2k$  & \cell{1}{0.46\\0.22}  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\\0.22}  & -- & -- & -- & -- & -- & --  \\\hline\hline
\cell{1}{$ST$-\\unitigs} 
  & \cell{1}{0.56\\0.22}  & \cell{1}{0.23\\0.22}  & \cell{1}{0.23\

In [31]:
fixed_rd_dict = dict()

fixed_rd_dict['k'] = compute_fixed_rd_table(0, limit_size_60, limit_size_30, components, vertices_inv)
fixed_rd_dict['k+1'] = compute_fixed_rd_table(1, limit_size_60, limit_size_30, components, vertices_inv)
fixed_rd_dict['t'] = compute_fixed_rd_table(-1, limit_size_60, limit_size_30, components, vertices_inv)
fixed_rd_dict['2k'] = compute_fixed_rd_table(-2, limit_size_60, limit_size_30, components, vertices_inv)
fixed_rd_dict['unitigs'] = compute_fixed_rd_table_unitigs(limit_size_60, limit_size_30, components, vertices_inv)

In [32]:
print(format_third_paper_table(fixed_rd_dict, '_bases'))

\begin{center}
    \begin{tabular}{|c|ccc|ccc|ccc|}
\hline
 & \multicolumn{3}{c|}{small graphs (3-15 vertices)}        & \multicolumn{3}{c|}{medium graphs (16-50 vertices)}     & \multicolumn{3}{c|}{large graphs (51-725 vertices)}          \\
$\ell$  & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{1}{0.20\\0.00}  & \cell{1}{0.97\\1.00}  & \cell{1}{0.98\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$k+1$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.90\\0.93}  & \cell{1}{0.96\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$t$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.90\\0.93}  & \cell{1}{0.96\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$2k$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.90\\0.93}  & \cell{1}{0.96\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline\hline
\cell{1}{$ST$-\\unitigs} 
  & \cell{1}{1.00\\1.00}  & \cell{1}{0.81\\0.93}  & \cell{1}{0.87\

In [33]:
## This is the sixth table
print(format_third_paper_table(fixed_rd_dict, '_vertex'))

\begin{center}
    \begin{tabular}{|c|ccc|ccc|ccc|}
\hline
 & \multicolumn{3}{c|}{small graphs (3-15 vertices)}        & \multicolumn{3}{c|}{medium graphs (16-50 vertices)}     & \multicolumn{3}{c|}{large graphs (51-725 vertices)}          \\
$\ell$  & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    & \texttt{prec}   & \texttt{esr}     & \texttt{mcr}    \\\hline\hline
$k$  & \cell{1}{0.20\\0.00}  & \cell{1}{0.96\\1.00}  & \cell{1}{0.98\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$k+1$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.84\\0.87}  & \cell{1}{0.95\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$t$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.84\\0.87}  & \cell{1}{0.95\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline
$2k$  & \cell{1}{1.00\\1.00}  & \cell{1}{0.84\\0.87}  & \cell{1}{0.95\\1.00}  & -- & -- & -- & -- & -- & --  \\\hline\hline
\cell{1}{$ST$-\\unitigs} 
  & \cell{1}{1.00\\1.00}  & \cell{1}{0.76\\0.87}  & \cell{1}{0.87\

In [34]:
def format_time(time):
    return f'{time/1000000:.2f}'

def format_fourth_table(data, rl):
    a = f"""\\begin{{table}}
\\begin{{tabular}}{{l|l|l|l|l|}}
\\cline{{2-5}}
                       & \\multicolumn{{4}}{{c|}}{{$\\ell = {rl}$}} \\\\ \\hline
\\multicolumn{{1}}{{|l|}}{{Gene graph sets}} & Unoptimized (secs) & Two Finger (secs) & Optimized (secs) & Heuristic (secs) \\\\ \\hline
\\multicolumn{{1}}{{|l|}}{{small, 3-{limit_size_60} vertices}} & {format_time(data[rl]['unoptimized']['small']['time'])}  & {format_time(data[rl]['two_finger']['small']['time'])} & {format_time(data[rl]['optimized']['small']['time'])} & {format_time(data[rl]['heuristic']['small']['time'])}\\\\ \\hline
\\multicolumn{{1}}{{|l|}}{{medium, {limit_size_60+1}-{limit_size_30} vertices}} & {format_time(data[rl]['unoptimized']['medium']['time'])} & {format_time(data[rl]['two_finger']['medium']['time'])} & {format_time(data[rl]['optimized']['medium']['time'])} & {format_time(data[rl]['heuristic']['medium']['time'])}\\\\ \\hline
\\multicolumn{{1}}{{|l|}}{{large, {limit_size_30+1}-{limit_size_10} vertices}} & {format_time(data[rl]['unoptimized']['large']['time'])} & {format_time(data[rl]['two_finger']['large']['time'])} & {format_time(data[rl]['optimized']['large']['time'])} & {format_time(data[rl]['heuristic']['large']['time'])}\\\\ \\hline
\\multicolumn{{1}}{{|l|}}{{all, 3-{limit_size_10} vertices}}  & {format_time(data[rl]['unoptimized']['all']['time'])} & {format_time(data[rl]['two_finger']['all']['time'])} & {format_time(data[rl]['optimized']['all']['time'])} & {format_time(data[rl]['heuristic']['all']['time'])}\\\\ \\hline
\\end{{tabular}}
\\end{{table}}"""
    return a

d = {
    'k': {
            'optimized': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'heuristic': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'unoptimized': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'two_finger': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            }
    },
    'k+1': {
            'optimized': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'heuristic': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'unoptimized': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            },
            'two_finger': {
                'all': {'time': 24781434},
                 'large': {'time': 23265773},
                 'medium': {'time': 922717},
                 'small': {'time': 592944}
            }
    }
}
print(format_fourth_table(d, 'k'))

\begin{table}
\begin{tabular}{l|l|l|l|l|}
\cline{2-5}
                       & \multicolumn{4}{c|}{$\ell = k$} \\ \hline
\multicolumn{1}{|l|}{Gene graph sets} & Unoptimized (secs) & Two Finger (secs) & Optimized (secs) & Heuristic (secs) \\ \hline
\multicolumn{1}{|l|}{small, 3-15 vertices} & 0.59  & 0.59 & 0.59 & 0.59\\ \hline
\multicolumn{1}{|l|}{medium, 16-50 vertices} & 0.92 & 0.92 & 0.92 & 0.92\\ \hline
\multicolumn{1}{|l|}{large, 51-725 vertices} & 23.27 & 23.27 & 23.27 & 23.27\\ \hline
\multicolumn{1}{|l|}{all, 3-725 vertices}  & 24.78 & 24.78 & 24.78 & 24.78\\ \hline
\end{tabular}
\end{table}


In [35]:
time_dict = dict()
time_dict['k'] = dict()
time_dict['k+1'] = dict()
time_dict['t'] = dict()
time_dict['2k'] = dict()

time_dict['k']['optimized'] = compute_time_table(0, limit_size_60, limit_size_30, components, vertices_inv)
time_dict['k']['two_finger'] = compute_time_table(0, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_two_finger')
time_dict['k']['unoptimized'] = compute_time_table(0, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_unoptimized')
time_dict['k']['heuristic'] = compute_time_table(0, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_heuristic')


time_dict['k+1']['optimized'] = compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv)
time_dict['k+1']['two_finger'] = compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_two_finger')
time_dict['k+1']['unoptimized'] = compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_unoptimized')
time_dict['k+1']['heuristic'] = compute_time_table(1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_heuristic')


time_dict['t']['optimized'] = compute_time_table(-1, limit_size_60, limit_size_30, components, vertices_inv)
time_dict['t']['two_finger'] = compute_time_table(-1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_two_finger')
time_dict['t']['unoptimized'] = compute_time_table(-1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_unoptimized')
time_dict['t']['heuristic'] = compute_time_table(-1, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_heuristic')


time_dict['2k']['optimized'] = compute_time_table(-2, limit_size_60, limit_size_30, components, vertices_inv)
time_dict['2k']['two_finger'] = compute_time_table(-2, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_two_finger')
time_dict['2k']['unoptimized'] = compute_time_table(-2, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_unoptimized')
time_dict['2k']['heuristic'] = compute_time_table(-2, limit_size_60, limit_size_30, components, vertices_inv, 'experiments_heuristic')


print(format_fourth_table(time_dict, 'k'))
print(format_fourth_table(time_dict, 'k+1'))
print(format_fourth_table(time_dict, 't'))
print(format_fourth_table(time_dict, '2k'))

\begin{table}
\begin{tabular}{l|l|l|l|l|}
\cline{2-5}
                       & \multicolumn{4}{c|}{$\ell = k$} \\ \hline
\multicolumn{1}{|l|}{Gene graph sets} & Unoptimized (secs) & Two Finger (secs) & Optimized (secs) & Heuristic (secs) \\ \hline
\multicolumn{1}{|l|}{small, 3-15 vertices} & 0.00  & 0.00 & 0.00 & 0.00\\ \hline
\multicolumn{1}{|l|}{medium, 16-50 vertices} & 0.00 & 0.00 & 0.00 & 0.00\\ \hline
\multicolumn{1}{|l|}{large, 51-725 vertices} & 0.00 & 0.00 & 0.00 & 0.00\\ \hline
\multicolumn{1}{|l|}{all, 3-725 vertices}  & 0.00 & 0.00 & 0.00 & 0.00\\ \hline
\end{tabular}
\end{table}
\begin{table}
\begin{tabular}{l|l|l|l|l|}
\cline{2-5}
                       & \multicolumn{4}{c|}{$\ell = k+1$} \\ \hline
\multicolumn{1}{|l|}{Gene graph sets} & Unoptimized (secs) & Two Finger (secs) & Optimized (secs) & Heuristic (secs) \\ \hline
\multicolumn{1}{|l|}{small, 3-15 vertices} & 0.00  & 0.00 & 0.00 & 0.00\\ \hline
\multicolumn{1}{|l|}{medium, 16-50 vertices} & 0.00 & 0.00 & 0.00 & 0.

In [36]:
def format_number(n) :
    if type(n) == int:
        return str(n)
    return f"{trunc(n,2):.2f}"

def format_cell(data, key, variant):
    
    mdata = data[key]
    return f"""\\cell{{2}}{{{format_number(mdata['max_cov_rel'+variant]['mean'])}\\\\{format_number(mdata['max_cov_rel'+variant]['median'])}}}"""

def format_row(data, key, variant):
    
    mdata = data[key]
    return f"""{format_cell(mdata, 'small', variant)} & {format_cell(mdata, 'medium', variant)} & {format_cell(mdata, 'large', variant)} \\\\\\hline"""
    

def format_cdss_paper_table(data, variant=''):
    
    a = f"""\\begin{{table}}[t]
\centering
\caption{{cdss max relative coverage grouped by cdss size}}
\\begin{{tabular}}{{|c|c|c|c|}}
\\hline
$\\ell$ & small (1-{limit_cdss_60} bases)    &  medium ({limit_cdss_60+1}-{limit_cdss_30} bases) & large ({limit_cdss_30+1}-{limit_cdss_10} bases)\\\\\\hline\\hline
$k$  & {format_row(data, 'k', variant)}
$k+1$  & {format_row(data, 'k+1', variant)}
$t$  & {format_row(data, 't', variant)}
$2k$  & {format_row(data, '2k', variant)}\\hline
\\cell{{1}}{{$ST$-\\\\unitigs}}
   & {format_row(data, 'unitigs', variant)}
\\end{{tabular}} 
\\end{{table}}"""

    return a

d = {
    'k': {'small': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
    'k+1': {'small': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
    't': {'small': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
    '2k': {'small': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
    'unitigs': {'small': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'medium': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}},'large': {'max_cov_rel': {'mean': 0.1, 'median': 0.2, 'stdev': 0.1}}},
}

print(format_cdss_paper_table(d))

\begin{table}[t]
\centering
\caption{cdss max relative coverage grouped by cdss size}
\begin{tabular}{|c|c|c|c|}
\hline
$\ell$ & small (1-150 bases)    &  medium (151-500 bases) & large (501-27705 bases)\\\hline\hline
$k$  & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} \\\hline
$k+1$  & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} \\\hline
$t$  & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} \\\hline
$2k$  & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} \\\hline\hline
\cell{1}{$ST$-\\unitigs}
   & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} & \cell{2}{0.10\\0.20} \\\hline
\end{tabular} 
\end{table}


In [37]:
cdss_dict = dict()
cdss_dict['unitigs'] = compute_cdss_coverage_table_unitigs(limit_cdss_60, limit_cdss_30, components, vertices_inv)
cdss_dict['k']  = compute_cdss_coverage_table(0, limit_cdss_60, limit_cdss_30, components, vertices_inv)
cdss_dict['k+1']  = compute_cdss_coverage_table(1, limit_cdss_60, limit_cdss_30, components, vertices_inv)
cdss_dict['t']  = compute_cdss_coverage_table(-1, limit_cdss_60, limit_cdss_30, components, vertices_inv)
cdss_dict['2k']  = compute_cdss_coverage_table(-2, limit_cdss_60, limit_cdss_30, components, vertices_inv)

In [38]:
print(format_cdss_paper_table(cdss_dict))

\begin{table}[t]
\centering
\caption{cdss max relative coverage grouped by cdss size}
\begin{tabular}{|c|c|c|c|}
\hline
$\ell$ & small (1-150 bases)    &  medium (151-500 bases) & large (501-27705 bases)\\\hline\hline
$k$  & \cell{2}{1.00\\1.00} & \cell{2}{0.99\\1.00} & \cell{2}{0.99\\1.00} \\\hline
$k+1$  & \cell{2}{1.00\\1.00} & \cell{2}{0.97\\1.00} & \cell{2}{0.98\\1.00} \\\hline
$t$  & \cell{2}{1.00\\1.00} & \cell{2}{0.97\\1.00} & \cell{2}{0.98\\1.00} \\\hline
$2k$  & \cell{2}{1.00\\1.00} & \cell{2}{0.97\\1.00} & \cell{2}{0.98\\1.00} \\\hline\hline
\cell{1}{$ST$-\\unitigs}
   & \cell{2}{0.52\\0.86} & \cell{2}{0.82\\1.00} & \cell{2}{0.81\\1.00} \\\hline
\end{tabular} 
\end{table}


In [39]:
summary_dict = {
    'impr_dict': impr_dict,
    'fixed_l_dict': fixed_l_dict,
    'fixed_rd_dict': fixed_rd_dict,
    'time_dict': time_dict,
    'cdss_dict': cdss_dict
}

f = open('./summary.json', 'w')
dump(summary_dict, f)
f.close()