## Release Testing: MAli-v0.1 on BALIS-2

#### Imports

In [1]:
import os
import shutil
import subprocess
import time
import pandas as pd

In [2]:
from wrapped_scorer import WrappedScorer

#### MAli

In [3]:
SEED = 9032025

In [4]:
ALIGNER_NAME = "MAli-v0.1"
ALIGNER_PATH = f"aligners/{ALIGNER_NAME}/MAli.exe"
OUTPUT_FOLDER = f"aligners/{ALIGNER_NAME}/output"

In [5]:
# creating empty output folder
if os.path.exists(OUTPUT_FOLDER):
    shutil.rmtree(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER)

In [6]:
RESULTS_FILE_DIRECTORY = f"results/{ALIGNER_NAME}"

In [7]:
# creating empty output folder
if os.path.exists(RESULTS_FILE_DIRECTORY):
    shutil.rmtree(RESULTS_FILE_DIRECTORY)
os.makedirs(RESULTS_FILE_DIRECTORY)

#### BALIS-2

In [8]:
DATASET_NAME = "BALIS-2"
DATASET_FOLDER = f"datasets/{DATASET_NAME}"
INPUT_FOLDER = f"{DATASET_FOLDER}/in"
REFERENCES_FOLDER = f"{DATASET_FOLDER}/ref"

In [9]:
# checking that testcases have been found
testcases = os.listdir(INPUT_FOLDER)
print(testcases[:10])

['BB11001', 'BB11002', 'BB11003', 'BB11006', 'BB11007', 'BB11010', 'BB11012', 'BB11013', 'BB11017', 'BB11020']


#### QScore

In [10]:
SCORER_PATH = "scorers/QScore/qscore.exe"

In [11]:
scorer = WrappedScorer(SCORER_PATH)

#### Performing Alignment

In [12]:
def perform_alignment_and_record_time(filename):

    input_path = f"{INPUT_FOLDER}/{filename}"
    output_path = f"{OUTPUT_FOLDER}/{filename}"
    command = f"{ALIGNER_PATH} -input {input_path} -output {output_path} -seed {SEED}"

    start_time = time.perf_counter()
    
    subprocess.run(command)

    end_time = time.perf_counter()
    
    time_in_milliseconds = (end_time - start_time) * 1000
    time_in_milliseconds = round(time_in_milliseconds, 0)

    return int(time_in_milliseconds)

In [13]:
def score_quality_of_produced_alignment(filename):

    test_path = f"{OUTPUT_FOLDER}/{filename}"
    reference_path = f"{REFERENCES_FOLDER}/{filename}"
    score = scorer.score_testcase(test_path, reference_path)

    return score

In [14]:
def record_performance_on_testcase(filename):

    time_taken = perform_alignment_and_record_time(testcase)
    score = score_quality_of_produced_alignment(testcase)


    return f"{ALIGNER_NAME},{DATASET_NAME},{filename},{score},{time_taken}"
    

In [15]:
def write_records_to_csv(records, filename):

    RESULTS_FILEPATH = f"{RESULTS_FILE_DIRECTORY}/{filename}"

    with open(RESULTS_FILEPATH, "w") as file:
        for record in RECORDS:
            file.write(record)
            file.write("\n")

    print(f"Results written to: {RESULTS_FILEPATH}")

In [16]:
HEADER = "aligner,dataset,testcase,Q_score,time_elapsed_ms"

In [17]:
RECORDS = []
RECORDS.append(HEADER)

RESULTS_FILENAME = f"sbench_{ALIGNER_NAME}_on_{DATASET_NAME}.csv"
RESULTS_FILEPATH = f"results/{RESULTS_FILENAME}"
for testcase in testcases:
    record = record_performance_on_testcase(testcase)
    print(record)
    RECORDS.append(record)

write_records_to_csv(RECORDS, RESULTS_FILENAME)

MAli-v0.1,BALIS-2,BB11001,0.0497,54
MAli-v0.1,BALIS-2,BB11002,0.0168,56
MAli-v0.1,BALIS-2,BB11003,0.0115,55
MAli-v0.1,BALIS-2,BB11006,0.00595,56
MAli-v0.1,BALIS-2,BB11007,0.0575,55
MAli-v0.1,BALIS-2,BB11010,0.0,53
MAli-v0.1,BALIS-2,BB11012,0.0219,53
MAli-v0.1,BALIS-2,BB11013,0.0103,54
MAli-v0.1,BALIS-2,BB11017,0.0574,53
MAli-v0.1,BALIS-2,BB11020,0.033,54
MAli-v0.1,BALIS-2,BB11023,0.0215,54
MAli-v0.1,BALIS-2,BB11024,0.0,52
MAli-v0.1,BALIS-2,BB11026,0.0,54
MAli-v0.1,BALIS-2,BB11028,0.0211,53
MAli-v0.1,BALIS-2,BB11030,0.0177,59
MAli-v0.1,BALIS-2,BB11032,0.023,55
MAli-v0.1,BALIS-2,BB11034,0.00607,56
MAli-v0.1,BALIS-2,BB11035,0.0297,106
MAli-v0.1,BALIS-2,BB11036,0.0194,59
MAli-v0.1,BALIS-2,BB12001,0.0109,53
MAli-v0.1,BALIS-2,BB12003,0.198,54
MAli-v0.1,BALIS-2,BB12006,0.11,55
MAli-v0.1,BALIS-2,BB12008,0.104,59
MAli-v0.1,BALIS-2,BB12015,0.0278,59
MAli-v0.1,BALIS-2,BB12017,0.0243,55
MAli-v0.1,BALIS-2,BB12020,0.159,52
MAli-v0.1,BALIS-2,BB12022,0.0327,53
MAli-v0.1,BALIS-2,BB12025,0.224,55
MAli-v