In [1]:
import tests.data_generation.globals as test_glob
from tests.data_generation.globals import SAVELOC
from tests.data_generation.run_generator import FakeRun
from tests.data_generation.stress_tests import run_aa_stress_test, check_well_is_parent, calculate_parent_counts

import os
import pickle
import random
import shutil
import numpy as np
import pandas as pd
from glob import glob

This is for stress-testing the evSeq code by passing in random inputs with known expected output. We then check to see if the evSeq outputs match the expected.

In [2]:
def build_new_dir(new_dir):
    if not os.path.isdir(new_dir):
        os.mkdir(new_dir)

def compare_datasets(expected_out, true_out, flavor, counter):
    
    # Test the two dataframes from each set to make sure they agree
    test_passed, bad_platewells, reports = run_aa_stress_test(expected_out, true_out)
    
    # Note success if all tests passed
    if test_passed:
        print(f"All {flavor} tests passed for seed: {counter}")

    # Save the error reports if there were any
    else:
        # Report errors
        for plate, well in bad_platewells:
            print(f"Errors found for {flavor} {plate}-{well} with seed {counter}.")

        # Save the messed up components
        error_loc = os.path.join(SAVELOC, "ErrorReports")
        build_new_dir(error_loc)

        error_loc = os.path.join(error_loc, flavor)
        build_new_dir(error_loc)

        with open(os.path.join(error_loc, f"{counter}.pkl"), "wb") as f:
            pickle.dump([bad_platewells, reports], f)
            
    return test_passed

def run_evseq_stress_test(detailed, include_nnn, 
                          keep_output = False, seed = 0):
    
    # Run until we break something
    counter = seed
    while True:
                
        # Update the global RNG to match the counter (for reproducbility)
        test_glob.RANDOM_SEED = counter
        test_glob.NP_RNG = np.random.default_rng(counter)
        test_glob.RANDOM_RNG = random.Random(counter)
    
        # Build a test run and the associated output files
        test_run = FakeRun(detailed = detailed)
        test_run.build_fastq()
        test_run.build_refseq(include_nnn)

        # Run evSeq on the generated data
        test_run.run_evseq()

        # Get the expected outputs
        expected_decoupled, expected_coupled = test_run.build_expected_aa()

        # Get the true outputs. Sort the true output in the same
        # way the expected was sorted.
        most_recent_run_path = sorted(glob(os.path.join(SAVELOC, "evSeqOutput", "*")))[-1]
        true_decoupled = pd.read_csv(os.path.join(most_recent_run_path, "OutputCounts", 
                                                  "AminoAcids_Decoupled_All.csv"))
        true_coupled = pd.read_csv(os.path.join(most_recent_run_path, "OutputCounts",
                                                "AminoAcids_Coupled_All.csv"))
        
        true_decoupled.sort_values(by = ["IndexPlate", "Well", "AaPosition", "Aa"],
                             inplace = True)
        true_coupled.sort_values(by = ["IndexPlate", "Well", "AlignmentFrequency", "SimpleCombo"],
                                 inplace = True)
        
        # Test the two dataframes from each set to make sure they agree
        uncoupled_passed = compare_datasets(expected_decoupled,
                                            true_decoupled,
                                            "Uncoupled",
                                            counter)
        coupled_passed = compare_datasets(expected_coupled,
                                          true_coupled,
                                          "Coupled",
                                          counter)
        
        # If both tests passed, delete output
        if uncoupled_passed and coupled_passed and not keep_output:
            shutil.rmtree(most_recent_run_path)
            
        # Update the counter
        counter += 1

In [3]:
run_evseq_stress_test(True, True, 
                      keep_output = True,
                     seed = 3)

All Uncoupled tests passed for seed: 3
Errors found for Coupled DI06-G08 with seed 3.
All Uncoupled tests passed for seed: 4
Errors found for Coupled DI06-G10 with seed 4.
All Uncoupled tests passed for seed: 5
All Coupled tests passed for seed: 5
All Uncoupled tests passed for seed: 6
All Coupled tests passed for seed: 6
All Uncoupled tests passed for seed: 7
All Coupled tests passed for seed: 7
All Uncoupled tests passed for seed: 8
All Coupled tests passed for seed: 8
All Uncoupled tests passed for seed: 9
All Coupled tests passed for seed: 9
All Uncoupled tests passed for seed: 10
All Coupled tests passed for seed: 10
All Uncoupled tests passed for seed: 11
All Coupled tests passed for seed: 11
All Uncoupled tests passed for seed: 12
All Coupled tests passed for seed: 12
All Uncoupled tests passed for seed: 13
Errors found for Coupled DI05-B09 with seed 13.
All Uncoupled tests passed for seed: 14
All Coupled tests passed for seed: 14
All Uncoupled tests passed for seed: 15
All Coup

KeyboardInterrupt: 

To do:
1. Build code to test combinations of amino acids coming out
3. Build code to test the "max" files coming out
4. Add on code to test for DEAD wells.