# Benchmark

## 1) Setup

In [None]:
from typing import Any, List, Union, Dict, Tuple
import pandas as pd
import numpy as np
import os
import pathlib
import time
import subprocess
import json
import tqdm.auto as tqdm

In [None]:
DATADIR = './data'
MODELDIR = './models/'
RESULTS_PATH = './results'
BENCHMARKS = ['hg19/chr{}.fa'.format(i) for i in range(1,23) ] + ['hg38/chr1.fa', 'mm10/chr2.fa']

In [None]:
def benchmark(benchmarks: List[str], modelfiles: List[str], errorfile: str,
              command: List[str],
             results: Dict[str,Any]) -> Dict[str, Dict[str, float]]:
    """Benchmark a program"""
    for k in tqdm.tqdm(benchmarks):
        results.setdefault(k, {})
        infile = pathlib.Path(DATADIR, k)
        for model_path in tqdm.tqdm(modelfiles):
            modelname = str(model_path.relative_to(MODELDIR))
            if modelname in results[k]:
                continue
            outfile = '{}_{}.tsv'.format(k, modelname).replace('/', '_')
            with open(outfile, 'wb') as file:
                env = os.environ.copy()
                env["TF_XLA_FLAGS"]="--tf_xla_auto_jit=2"
                start_time = time.time()
                process = subprocess.Popen(command + [model_path, infile],
                                           stdout=file,
                                           stderr=subprocess.PIPE,
                                           env=env)
                _, errdata = process.communicate()
                end_time = time.time()
            runtime = end_time - start_time
            results[k][modelname] = {'runtime': runtime}
            with open(errorfile, 'ab') as file:
                file.write(errdata)
    return results

## 2) Run DeepGRP benchmark

In [None]:
filename = 'deepgrp_runningtime.json'

In [None]:
assert filename != 'deepgrp_runningtime.json', "Change filename if you want to compute anything new"

In [None]:
try:
    with pathlib.Path(RESULTS_PATH, filename).open('r') as file:
        results = json.load(file)
except (json.JSONDecodeError, FileNotFoundError):
    results = {}

In [None]:
deepgrpmodels = list(pathlib.Path(MODELDIR).glob('model_*.h5'))
deepgrp_command = ['python3', '-m', 'deepgrp', '-t 10', "--xla", "-b 4096"]
deepgrp_results = benchmark(BENCHMARKS, deepgrpmodels, 'deepgrp.log',
                            deepgrp_command, results = results)

#### Save results

In [None]:
with pathlib.Path(RESULTS_PATH, filename).open('w') as file:
    json.dump(deepgrp_results, file)

## 3) Run dna-brnn benchmark 

In [None]:
filename = 'dnabrnn_runningtime.json'

In [None]:
assert filename != 'dnabrnn_runningtime.json', "Change filename if you want to compute anything new"

In [None]:
try:
    with pathlib.Path(RESULTS_PATH, filename).open('r') as file:
        results = json.load(file)
except (json.JSONDecodeError, FileNotFoundError):
    results = {}

In [None]:
dnabrnnmodels = list(pathlib.Path(MODELDIR).glob("dnabrnn_model*.knm"))
dnabrnn_command = ['dna-nn/dna-brnn', '-t 10', '-O292', '-Ai']
dnabrnnresults = benchmark(BENCHMARKS, dnabrnnmodels, 'dnabrnn.log',
                           dnabrnn_command, results = results)

#### Save results

In [None]:
with pathlib.Path(RESULTS_PATH, filename).open('w') as file:
    json.dump(dnabrnnresults, file)