Remember first to compile the c++ code in a build folder under the main folder, you can do this by

```
... in the main folder of the project
mkdir build
cd build
cmake ..
cmake .. # You need to do this twice to download dependencies
make
```

To run this notebook you first need to run the notebook `data_manipulation/graph_creation.ipynb` or obtain the data generated for such notebook in the corresponding folders
---

In [1]:
from json import dump, load
import networkx as nx

## Load gene graphs

f = open(f'../gene_graphs/vertices_inv.json', 'r')
vertices_inv = load(f)
f.close()
vertices_inv = {int(k): tuple(v) for k,v in vertices_inv.items()}

components = list()
for i in range(3348): ## Number of gene_graphs
    components.append(dict())
    components[i]['graph'] = nx.read_edgelist(f'../gene_graphs/graphs/component_{i+1}.edgelist', delimiter=':', create_using=nx.DiGraph, nodetype=int)
    components[i]['len'] = len(components[i]['graph'])
    
    f = open(f'../gene_graphs/sources/component_{i+1}.json', 'r')
    components[i]['sources'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/targets/component_{i+1}.json', 'r')
    components[i]['targets'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/vertex_constrains/component_{i+1}.json', 'r')
    components[i]['vertex_constrains'] = set(load(f))
    f.close()
    
    f = open(f'../gene_graphs/transcript_paths/component_{i+1}.json', 'r')
    components[i]['transcript_paths'] = load(f)
    f.close()
    

In [2]:
# Run MPC
## Can be skipped if already computed
for i in range(len(components)):
    if components[i]['len'] > 2:
        ! ../../../build/experiments/run_mpc "../lgf/component_{i+1}.lgf" > ../mpc/component_{i+1}.mpc

In [3]:
## Function to obtain the width from the output of the previous files
def get_width(i):
    f = open(f'../mpc/component_{i+1}.mpc', 'r')
    width =  f.read().split('\n')
    f.close()
    width = int(width[1].split('=')[1])
    return width

In [4]:
## Obtain the widths of the graphs
for i, component in enumerate(components):
    if component['len'] > 2:
        component['width'] = get_width(i)

In [5]:
## Compute safe paths for \ell \in [width .... stabilizes_at], where stabilizes_at outputs the same results as stabilizes_at-1 (and same as 2*width)
## Can be skipped if already computed
#
#optimized = open('optimized.sh', 'w')
#optimized.write('#!/bin/bash\n')
#
#two_finger = open('two_finger.sh', 'w')
#two_finger.write('#!/bin/bash\n')
#
#unoptimized = open('unoptimized.sh', 'w')
#unoptimized.write('#!/bin/bash\n')
#
#heuristic = open('heuristic.sh', 'w')
#heuristic.write('#!/bin/bash\n')
#
#
#for i, component in enumerate(components):
#    n = len(component['graph'])
#    t = len(component['transcript_paths'])
#    if component['len'] > 2:
#        width = component['width']
#        
#        ! /usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}
#        optimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}\n')
#        two_finger.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem_two_finger -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_two_finger "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}_two_finger\n')
#        unoptimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem_unoptimized -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_unoptimized "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}_unoptimized\n')
#        heuristic.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{2*width}_mem_heuristic -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_safe_edges_opt "../lgf/component_{i+1}.lgf" {2*width} > ../safe_paths/component_{i+1}_{2*width}_heuristic\n')
#
#        stabilizes_at = None
#        two_widths_file = f'../safe_paths/component_{i+1}_{2*width}'
#        for l in range(width, 2*width+1):
#            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}
#            optimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}\n')
#            two_finger.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem_two_finger -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_two_finger "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}_two_finger\n')
#            unoptimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem_unoptimized -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_unoptimized "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}_unoptimized\n')
#            heuristic.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{l}_mem_heuristic -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_safe_edges_opt "../lgf/component_{i+1}.lgf" {l} > ../safe_paths/component_{i+1}_{l}_heuristic\n')
#            
#            df = ! diff ../safe_paths/component_{i+1}_{l} {two_widths_file}
#            if len(df) <= 10:
#                stabilizes_at = l
#                break
#            
#        if stabilizes_at < t:
#            
#            ! /usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}
#            optimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem -f "%M,%U,%S" ../../../build/experiments/run_safe_paths "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}\n')
#            two_finger.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem_two_finger -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_two_finger "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}_two_finger\n')
#            unoptimized.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem_unoptimized -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_unoptimized "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}_unoptimized\n')
#            heuristic.write(f'/usr/bin/time -o ../safe_paths/component_{i+1}_{t}_mem_heuristic -f "%M,%U,%S" ../../../build/experiments/run_safe_paths_safe_edges_opt "../lgf/component_{i+1}.lgf" {t} > ../safe_paths/component_{i+1}_{t}_heuristic\n')
#
#optimized.close()
#two_finger.close()
#unoptimized.close()
#heuristic.close()
! sh optimized.sh

In [6]:
## Given a file name (an experiment of safe_paths), it obtains the running time, peak memory, number of safe paths, and the safe_paths themselves
def process_output(filename):
    of = open(f'{filename}', 'r')
    output = of.read().split('\n')
    of.close()
    number_of_safe_paths = int(output[2].split('=')[-1])
    time_main = int(output[3+number_of_safe_paths].split('=')[-1])
    time_filter = int(output[4+number_of_safe_paths].split('=')[-1])
    paths = output[3:3+number_of_safe_paths]
    paths = list(map(lambda path: list(map(lambda v: int(v) ,path.split(','))) ,paths))
    
    mf = open(f'{filename}_mem', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'number_of_safe_paths' : number_of_safe_paths,
        'time_main': time_main,
        'time_filter': time_filter,
        'safe_paths': paths,
        'peak_memory': mem
    }

In [7]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output(f'../safe_paths/component_{i+1}_{j}')
                component['experiments'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output(f'../safe_paths/component_{i+1}_{t}')
        component['experiments'][t] = d
        
        ## At 2width
        d = process_output(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments'][2*width] = d

In [9]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()

In [10]:
## Two finger approach

In [11]:
## Can be skipped if already computed
! sh two_finger.sh

In [12]:
## Given a file name (an experiment of safe_paths_two_finger), it obtains the running time and peak memory
def process_output_two_finger(filename):
    of = open(f'{filename}_two_finger', 'r')
    output = of.read().split('\n')
    of.close()
    time_main = int(output[2].split('=')[-1])
    time_filter = int(output[3].split('=')[-1])
    
    
    mf = open(f'{filename}_mem_two_finger', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'time_main': time_main,
        'time_filter': time_filter,
        'peak_mem': mem
    }

In [13]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments_two_finger'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output_two_finger(f'../safe_paths/component_{i+1}_{j}')
                component['experiments_two_finger'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output_two_finger(f'../safe_paths/component_{i+1}_{t}')
        component['experiments_two_finger'][t] = d
        
        ## At 2width
        d = process_output_two_finger(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments_two_finger'][2*width] = d

In [14]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        d['experiments_two_finger'] = component['experiments_two_finger']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()

In [15]:
## Unoptimized approach

In [16]:
## Can be skipped if already computed
! sh unoptimized.sh

In [17]:
## Given a file name (an experiment of safe_paths_unoptimized), it obtains the running time and peak memory
def process_output_unoptimized(filename):
    of = open(f'{filename}_unoptimized', 'r')
    output = of.read().split('\n')
    of.close()
    time_main = int(output[2].split('=')[-1])
    time_filter = int(output[3].split('=')[-1])
    
    
    mf = open(f'{filename}_mem_unoptimized', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'time_main': time_main,
        'time_filter': time_filter,
        'peak_mem': mem
    }

In [18]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments_unoptimized'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output_unoptimized(f'../safe_paths/component_{i+1}_{j}')
                component['experiments_unoptimized'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output_unoptimized(f'../safe_paths/component_{i+1}_{t}')
        component['experiments_unoptimized'][t] = d
        
        ## At 2width
        d = process_output_unoptimized(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments_unoptimized'][2*width] = d

In [19]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        d['experiments_two_finger'] = component['experiments_two_finger']
        d['experiments_unoptimized'] = component['experiments_unoptimized']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()

In [20]:
## Heuristic approach

In [20]:
## Can be skipped if already computed
! sh heuristic.sh

In [21]:
## Given a file name (an experiment of safe_paths_heuristic), it obtains the running time and peak memory
def process_output_heuristic(filename):
    of = open(f'{filename}_heuristic', 'r')
    output = of.read().split('\n')
    of.close()
    time_main = int(output[2].split('=')[-1])
    time_filter = int(output[3].split('=')[-1])
    
    
    mf = open(f'{filename}_mem_heuristic', 'r')
    mem = mf.read().split('\n')
    mf.close()
    mem = int(mem[0].split(',')[0])
    
    return {
        'time_main': time_main,
        'time_filter': time_filter,
        'peak_mem': mem
    }

In [22]:
## Read the results from the file
for i, component in enumerate(components):
    if component['len'] > 2:
        width = component['width']
        n = len(component['graph'])
        t = len(component['transcript_paths'])
    
        component['experiments_heuristic'] = dict()
        
        for j in range(width, 2*width):
            try:
                d = process_output_heuristic(f'../safe_paths/component_{i+1}_{j}')
                component['experiments_heuristic'][j] = d
            except:
                break
        
        ## At number of transcripts
        d = process_output_heuristic(f'../safe_paths/component_{i+1}_{t}')
        component['experiments_heuristic'][t] = d
        
        ## At 2width
        d = process_output_heuristic(f'../safe_paths/component_{i+1}_{2*width}')
        component['experiments_heuristic'][2*width] = d

In [23]:
from json import dump
## Store these results to a file in json format

for i, component in enumerate(components):
    if component['len'] > 2:
        d = dict()
        d['width'] = component['width']
        d['number_of_transcripts'] = len(component['transcript_paths'])
        d['experiments'] = component['experiments']
        d['experiments_two_finger'] = component['experiments_two_finger']
        d['experiments_unoptimized'] = component['experiments_unoptimized']
        d['experiments_heuristic'] = component['experiments_heuristic']
        
        file = open(f'../safe_paths_json/component_{i+1}.json' , 'w')
        dump(d, file)
        file.close()