## Release Testing: MAli v1.31 on BALIS-2

#### Imports

In [1]:
import os
import shutil
import subprocess
import time
import pandas as pd

In [2]:
from wrapped_scorer import WrappedScorer

#### MAli

In [3]:
SEED = 9032025

In [4]:
ALIGNER_NAME = "MAli-v1.31"
ALIGNER_PATH = f"aligners/{ALIGNER_NAME}/MAli.exe"
OUTPUT_FOLDER = f"aligners/{ALIGNER_NAME}/output"

In [5]:
# creating empty output folder
if os.path.exists(OUTPUT_FOLDER):
    shutil.rmtree(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER)

In [6]:
RESULTS_FILE_DIRECTORY = f"results/{ALIGNER_NAME}"

In [7]:
# creating empty output folder
if os.path.exists(RESULTS_FILE_DIRECTORY):
    shutil.rmtree(RESULTS_FILE_DIRECTORY)
os.makedirs(RESULTS_FILE_DIRECTORY)

#### BALIS-2

In [8]:
DATASET_NAME = "BALIS-2"
DATASET_FOLDER = f"datasets/{DATASET_NAME}"
INPUT_FOLDER = f"{DATASET_FOLDER}/in"
REFERENCES_FOLDER = f"{DATASET_FOLDER}/ref"

In [9]:
# checking that testcases have been found
testcases = os.listdir(INPUT_FOLDER)
print(testcases[:10])

['BB11001', 'BB11002', 'BB11003', 'BB11006', 'BB11007', 'BB11010', 'BB11012', 'BB11013', 'BB11017', 'BB11020']


#### QScore

In [10]:
SCORER_PATH = "scorers/QScore/qscore.exe"

In [11]:
scorer = WrappedScorer(SCORER_PATH)

#### Performing Alignment

In [12]:
def perform_alignment_and_record_time(filename):

    input_path = f"{INPUT_FOLDER}/{filename}"
    output_path = f"{OUTPUT_FOLDER}/{filename}"
    command = f"{ALIGNER_PATH} -input {input_path} -output {output_path} -seed {SEED} -iterations {iterations}"

    start_time = time.perf_counter()
    
    subprocess.run(command)

    end_time = time.perf_counter()
    
    time_in_milliseconds = (end_time - start_time) * 1000
    time_in_milliseconds = round(time_in_milliseconds, 0)

    return int(time_in_milliseconds)

In [13]:
def score_quality_of_produced_alignment(filename):

    test_path = f"{OUTPUT_FOLDER}/{filename}.faa"
    reference_path = f"{REFERENCES_FOLDER}/{filename}"
    score = scorer.score_testcase(test_path, reference_path)

    return score

In [14]:
def record_performance_on_testcase(filename):

    time_taken = perform_alignment_and_record_time(testcase)
    score = score_quality_of_produced_alignment(testcase)


    return f"{ALIGNER_NAME},{DATASET_NAME},{filename},{score},{time_taken}"
    

In [15]:
def write_records_to_csv(records, filename):

    RESULTS_FILEPATH = f"{RESULTS_FILE_DIRECTORY}/{filename}"

    with open(RESULTS_FILEPATH, "w") as file:
        for record in RECORDS:
            file.write(record)
            file.write("\n")

    print(f"Results written to: {RESULTS_FILEPATH}")

In [16]:
HEADER = "aligner,dataset,testcase,Q_score,time_elapsed_ms"

In [17]:
for iterations in [150, 100, 50, 25, 10, 1]:
    RECORDS = []
    RECORDS.append(HEADER)

    RESULTS_FILENAME = f"sbench_{ALIGNER_NAME}_on_{DATASET_NAME}_{iterations}_iterations.csv"
    RESULTS_FILEPATH = f"results/{RESULTS_FILENAME}"
    for testcase in testcases:
        record = record_performance_on_testcase(testcase)
        print(record)
        RECORDS.append(record)

    write_records_to_csv(RECORDS, RESULTS_FILENAME)

MAli-v1.31,BALIS-2,BB11001,0.863,2005
MAli-v1.31,BALIS-2,BB11002,0.103,3443
MAli-v1.31,BALIS-2,BB11003,0.202,9202
MAli-v1.31,BALIS-2,BB11006,0.0588,6351
MAli-v1.31,BALIS-2,BB11007,0.221,11012
MAli-v1.31,BALIS-2,BB11010,0.0235,9649
MAli-v1.31,BALIS-2,BB11012,0.667,6851
MAli-v1.31,BALIS-2,BB11013,0.0448,2103
MAli-v1.31,BALIS-2,BB11017,0.443,5044
MAli-v1.31,BALIS-2,BB11020,0.256,5457
MAli-v1.31,BALIS-2,BB11023,0.209,8184
MAli-v1.31,BALIS-2,BB11024,0.0469,7984
MAli-v1.31,BALIS-2,BB11026,0.0806,15146
MAli-v1.31,BALIS-2,BB11028,0.0944,4797
MAli-v1.31,BALIS-2,BB11030,0.0197,9163
MAli-v1.31,BALIS-2,BB11032,0.121,8806
MAli-v1.31,BALIS-2,BB11034,0.02,12895
MAli-v1.31,BALIS-2,BB11035,0.305,2610
MAli-v1.31,BALIS-2,BB11036,0.162,8597
MAli-v1.31,BALIS-2,BB12001,0.682,8792
MAli-v1.31,BALIS-2,BB12003,0.892,1501
MAli-v1.31,BALIS-2,BB12006,0.877,4343
MAli-v1.31,BALIS-2,BB12008,0.791,6760
MAli-v1.31,BALIS-2,BB12015,0.358,4173
MAli-v1.31,BALIS-2,BB12017,0.611,9214
MAli-v1.31,BALIS-2,BB12020,0.819,2694
MAl