Remember first to compile the c++ code in a build folder under the main folder, you can do this by

```
... in the main folder of the project
mkdir build
cd build
cmake ..
cmake .. # You need to do this twice to download dependencies
make
```

To run this notebook you first need to run the notebook `data_manipulation/graph_creation.ipynb` or obtain the data generated for such notebbok in the corresponding folders
---

In [None]:
from json import dump, load
import networkx as nx

## Load gene graphs

f = open(f'../gene_graphs/vertices_inv.json', 'r')
vertices_inv = load(f)
f.close()
vertices_inv = {int(k): tuple(v) for k,v in vertices_inv.items()}

components = list()
for i in range(27121): ## Number of gene_graphs
    components.append(dict())
    components[i]['graph'] = nx.read_edgelist(f'../gene_graphs/graphs/component_{i+1}.edgelist', delimiter=':', create_using=nx.DiGraph)
    components[i]['len'] = len(components[i]['graph'])
    
    f = open(f'../gene_graphs/sources/component_{i+1}.json', 'r')
    components[i]['sources'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/targets/component_{i+1}.json', 'r')
    components[i]['targets'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/vertex_constrains/component_{i+1}.json', 'r')
    components[i]['vertex_constrains'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/transcript_paths/component_{i+1}.json', 'r')
    components[i]['transcript_paths'] = load(f)
    f.close()
    

In [26]:
# Run MPC
## Can be skipped if already computed
for i in range(len(components)):
    if components[i]['len'] > 2:
        ! ../../build/experiments/run_mpc "../lgf/component_{i+1}.lgf" > ../mpc/component_{i+1}.mpc

In [52]:
## Function to obtain the width from the output of the previous files
def get_width(i):
    f = open(f'../mpc/component_{i+1}.mpc', 'r')
    width =  f.read().split('\n')
    f.close()
    width = int(width[1].split('=')[1])
    return width

In [54]:
## Obtain the widths of the graphs
for i, component in enumerate(components):
    if component['len'] > 2:
        component['width'] = get_width(i)

In [34]:
## Compute safe paths for \ell \in [width .... stabilizes_at], where stabilizes_at outputs the same results as stabilizes_at-1 (and same as 2*width)
## Can be skipped if already computed
for i, component in enumerate(components):
    n = len(component['graph'])
    t = len(component['transcript_paths'])
    if component['len'] > 2:
        width = component['width']
        ! /usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem -f "%M,%U,%S" ../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}
        stabilizes_at = None
        two_widths_file = f'../safe_paths/component_{i+1}_{2*width}'
        for l in range(width, 2*width+1):
            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem -f "%M,%U,%S" ../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}
            df = ! diff ../safe_paths/component_{i+1}_{l} {two_widths_file}
            if len(df) <= 10:
                stabilizes_at = l
                break
            
        if stabilizes_at < t:
            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem -f "%M,%U,%S" ../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}

In [61]:
## Given a file name (an experiment of safe_paths), it obtains the running time, peak memory, number of safe paths, and the safe_paths themselves
def process_output(filename):
    of = open(f'{filename}', 'r')
    output = of.read().split('\n')
    of.close()
    number_of_safe_paths = int(output[2].split('=')[-1])
    time_main = int(output[3+number_of_safe_paths].split('=')[-1])
    time_filter = int(output[4+number_of_safe_paths].split('=')[-1])
    paths = output[3:3+number_of_safe_paths]
    paths = list(map(lambda path: list(map(lambda v: int(v) ,path.split(','))) ,paths))
    
    mf = open(f'{filename}_mem', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'number_of_safe_paths' : number_of_safe_paths,
        'time_main': time_main,
        'time_filter': time_filter,
        'safe_paths': paths,
        'peak_memory': mem
    }

In [57]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output(f'../safe_paths/component_{i+1}_{j}')
                component['experiments'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output(f'../safe_paths/component_{i+1}_{t}')
        component['experiments'][t] = d
        
        ## At 2width
        d = process_output(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments'][2*width] = d

In [47]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()

In [48]:
## Naive approach

In [49]:
## Can be skipped if already computed
for i, component in enumerate(components):
    n = len(component['graph'])
    t = len(component['transcript_paths'])
    if component['len'] > 2:
        width = component['width']
        ! /usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem_naive -f "%M,%U,%S" ../../build/experiments/run_safe_paths_naive "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}_naive
        stabilizes_at = None
        two_widths_file = f'../safe_paths/component_{i+1}_{2*width}_naive'
        for l in range(width, 2*width+1):
            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem_naive -f "%M,%U,%S" ../../build/experiments/run_safe_paths_naive "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}_naive
            df = ! diff ../safe_paths/component_{i+1}_{l}_naive {two_widths_file}
            if len(df) <= 10:
                stabilizes_at = l
                break
            
        if stabilizes_at < t:
            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem_naive -f "%M,%U,%S" ../../build/experiments/run_safe_paths_naive "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}_naive

In [60]:
## Given a file name (an experiment of safe_paths_naive), it obtains the running time and peak memory
def process_output_naive(filename):
    of = open(f'{filename}_naive', 'r')
    output = of.read().split('\n')
    of.close()
    time_main = int(output[2].split('=')[-1])
    time_filter = int(output[3].split('=')[-1])
    
    
    mf = open(f'{filename}_mem_naive', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'time_main': time_main,
        'time_filter': time_filter,
        'peak_mem': mem
    }

In [62]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments_naive'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output_naive(f'../safe_paths/component_{i+1}_{j}')
                component['experiments_naive'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output_naive(f'../safe_paths/component_{i+1}_{t}')
        component['experiments_naive'][t] = d
        
        ## At 2width
        d = process_output_naive(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments_naive'][2*width] = d

In [None]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        d['experiments_naive'] = component['experiments']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()