# Dependency code

In [None]:
import imp
import os
import sys
import numpy as np
import glob
import cmdbench

fp, pathname, description = imp.find_module('benchmark', ['../lib'])
benchmark = imp.load_module('benchmark', fp, pathname, description)

# Summarize numpy array if it has more than 10 elements
np.set_printoptions(threshold=10)

# Software versions

In [None]:
sting_bin_dir = os.path.abspath('../lib/bin/STing')
os.environ['PATH'] = f"{sting_bin_dir}:{os.environ['PATH']}"

indexer_bin = f'{sting_bin_dir}/indexer'
typer_bin = f'{sting_bin_dir}/typer'
detector_bin = f'{sting_bin_dir}/detector'
db_util_bin = f'{sting_bin_dir}/db_util.py'
!{indexer_bin} --version
!{typer_bin} --version
!{detector_bin} --version
!{db_util_bin} --version

# Benchmark

## Input data and constants

In [None]:
input_dir = '../data/input-files/reads'
input_files_1 = [os.path.basename(f) for f in glob.glob(f'{input_dir}/*_1.fastq.gz')]
input_samples = [f.replace('_1.fastq.gz','') for f in input_files_1]
input_samples.sort()
print(input_samples)

sample_sizes = [1,10,20,30,40,50,60,70,80]

nproc = 32

sting_output = 'sting-output'
sting_db = 'sting-db'
scheme_name = 'Campylobacter jejuni'
scheme_db = f'{sting_db}/{scheme_name.lower().replace(" ", "_")}'
print(scheme_db)

benchmark.create_folder_if_doesnt_exist(sting_output)
benchmark.create_folder_if_doesnt_exist(sting_db)
benchmark.clean_if_exists(sting_db)
benchmark.clean_if_exists(sting_output)

## Build STing MLST database

In [None]:
!{db_util_bin} fetch --query "Campylobacter jejuni" --out_dir {sting_db} --build_index

## Benchmark functions

In [None]:
def reset_func():
    benchmark.clean_if_exists(sting_output)

def sampling_func(sample_size):
    samples = input_samples[:sample_size]
    return samples

typer_command = {
    "command": f"typer -x {scheme_db}/db/index -1 {input_dir}/%_1.fastq.gz -2 {input_dir}/%_2.fastq.gz",
    "parallel_args": f"-j {nproc} -I%"
}

## Benchmark

In [None]:
multibench_results, debug_str = benchmark.multibench.multi_cmdbench({
        "type": [typer_command]
    },
    reset_func = reset_func, iterations = 1, sampling_func = sampling_func, sample_sizes = sample_sizes, 
    benchmark_list_to_results = benchmark.benchmark_list_to_results, active_output_print = True
)

# Save and reload results

In [None]:
save_path = "string-results.txt"

samples_per_sample_size = []
for sample_size in sample_sizes:
        samples_per_sample_size.append(input_samples[:sample_size])

benchmark.multibench.save_multibench_results(multibench_results, samples_per_sample_size, save_path)

multibench_results, samples_per_sample_size = benchmark.multibench.read_multibench_results(save_path)
print(samples_per_sample_size)

# Plot

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import numpy as np
from pylab import rcParams
rcParams['figure.figsize'] = 15, 3

In [None]:
# Typer command Plots
benchmark.multibench.plot_resources(multibench_results, sample_sizes, "type")