## MUSCLE Alignment + Scoring

#### Imports

In [1]:
import os
import shutil
import subprocess
import time
import pandas as pd

In [2]:
from wrapped_scorer import WrappedScorer

#### MUSCLE

In [3]:
ALIGNER_NAME = "MUSCLE"
ALIGNER_PATH = "aligners/MUSCLE/muscle-win64.v5.3.exe"
OUTPUT_FOLDER = "aligners/MUSCLE/output"

In [4]:
# creating empty output folder
if os.path.exists(OUTPUT_FOLDER):
    shutil.rmtree(OUTPUT_FOLDER)
os.makedirs(OUTPUT_FOLDER)

#### BALIS-1

In [5]:
DATASET_NAME = "BALIS-1"
DATASET_FOLDER = "datasets/BALIS-1"
INPUT_FOLDER = f"{DATASET_FOLDER}/in"
REFERENCES_FOLDER = f"{DATASET_FOLDER}/ref"

In [6]:
# checking that testcases have been found
testcases = os.listdir(INPUT_FOLDER)
print(testcases[:10])

['BB11004', 'BB11005', 'BB11008', 'BB11009', 'BB11011', 'BB11014', 'BB11015', 'BB11016', 'BB11018', 'BB11019']


#### QScore

In [7]:
SCORER_PATH = "scorers/QScore/qscore.exe"

In [8]:
scorer = WrappedScorer(SCORER_PATH)

#### Performing Alignment

In [9]:
def perform_alignment_and_record_time(filename):

    input_path = f"{INPUT_FOLDER}/{filename}"
    output_path = f"{OUTPUT_FOLDER}/{filename}"
    command = f"{ALIGNER_PATH} -align {input_path} -output {output_path}"

    start_time = time.perf_counter()
    
    subprocess.run(command)

    end_time = time.perf_counter()
    
    time_in_milliseconds = (end_time - start_time) * 1000
    time_in_milliseconds = round(time_in_milliseconds, 0)

    return int(time_in_milliseconds)

In [10]:
def score_quality_of_produced_alignment(filename):

    test_path = f"{OUTPUT_FOLDER}/{filename}"
    reference_path = f"{REFERENCES_FOLDER}/{filename}"
    score = scorer.score_testcase(test_path, reference_path)

    return score

In [11]:
def record_performance_on_testcase(filename):

    time_taken = perform_alignment_and_record_time(testcase)
    score = score_quality_of_produced_alignment(testcase)


    return f"{ALIGNER_NAME},{DATASET_NAME},{filename},{score},{time_taken}"
    

In [12]:
HEADER = "aligner,dataset,testcase,Q_score,time_elapsed_ms"
RECORDS = []
RECORDS.append(HEADER)

In [13]:
for testcase in testcases:
    record = record_performance_on_testcase(testcase)
    RECORDS.append(record)

In [14]:
print(RECORDS[0])
print(RECORDS[-1])

aligner,dataset,testcase,Q_score,time_elapsed_ms
MUSCLE,BALIS-1,BB50014,0.971,258


#### Writing Records to CSV

In [15]:
RESULTS_FILENAME = f"sbench_{ALIGNER_NAME}_on_{DATASET_NAME}.csv"
RESULTS_FILEPATH = f"results/{RESULTS_FILENAME}"

In [16]:
with open(RESULTS_FILEPATH, "w") as file:
    for record in RECORDS:
        file.write(record)
        file.write("\n")

In [17]:
print(f"Results written to: {RESULTS_FILEPATH}")

Results written to: results/sbench_MUSCLE_on_BALIS-1.csv


#### Previewing Results

In [18]:
df = pd.read_csv(RESULTS_FILEPATH)
df.head()

Unnamed: 0,aligner,dataset,testcase,Q_score,time_elapsed_ms
0,MUSCLE,BALIS-1,BB11004,0.674,143
1,MUSCLE,BALIS-1,BB11005,0.554,760
2,MUSCLE,BALIS-1,BB11008,0.779,110
3,MUSCLE,BALIS-1,BB11009,0.758,63
4,MUSCLE,BALIS-1,BB11011,0.648,67
