In [1]:
import tests.data_generation.globals as test_glob
from tests.data_generation.globals import SAVELOC
from tests.data_generation.run_generator import FakeRun
from tests.data_generation.stress_tests import test_aa

import os
import pickle
import random
import shutil
import numpy as np
import pandas as pd
from glob import glob

This is for stress-testing the evSeq code by passing in random inputs with known expected output. We then check to see if the evSeq outputs match the expected.

In [2]:
def run_evseq_stress_test(detailed, include_nnn):
    
    # Run until we break something
    counter = -1
    while True:
        
        # Update the counter
        counter += 1
        
        # Update the global RNG to match the counter (for reproducbility)
        test_glob.RANDOM_SEED = counter
        test_glob.NP_RNG = np.random.default_rng(counter)
        test_glob.RANDOM_RNG = random.Random(counter)
    
        # Build a test run and the associated output files
        test_run = FakeRun(detailed = detailed)
        test_run.build_fastq()
        test_run.build_refseq(include_nnn)

        # Run evSeq on the generated data
        test_run.run_evseq()

        # Get the expected outputs
        expected_out = test_run.build_expected_aa()

        # Get the true outputs. Sort the true output in the same
        # way the expected was sorted.
        most_recent_run_path = sorted(glob(os.path.join(SAVELOC, "evSeqOutput", "*")))[-1]
        true_out = pd.read_csv(os.path.join(most_recent_run_path, "OutputCounts", 
                                            "AminoAcids_Decoupled_All.csv"))
        true_out.sort_values(by = ["IndexPlate", "Well", "AaPosition", "Aa"],
                             inplace = True)

        # Test the two dataframes to make sure they agree
        test_passed, bad_platewells, reports = test_aa(expected_out, true_out)
        
        # Note success if all tests passed and delete output
        if test_passed:
            print(f"All tests passed for seed: {counter}")
            shutil.rmtree(most_recent_run_path)
        
        # Save the error reports if there were any
        else:
            # Report errors
            for plate, well in bad_platewells:
                print(f"Errors found for {plate}-{well} with seed {counter}.")
                
            # Save the messed up components
            error_loc = os.path.join(SAVELOC, "ErrorReports")
            if not os.path.isdir(error_loc):
                os.mkdir(error_loc)
            with open(os.path.join(error_loc, f"{counter}.pkl"), "wb") as f:
                pickle.dump([bad_platewells, reports], f)

In [4]:
test = run_evseq_stress_test(False, False)

To do:
1. Reimplement pytests
2. Build code to test combinations of amino acids coming out
3. Build code to test the "max" files coming out