In [4]:
import tests.data_generation.globals as test_glob
from tests.data_generation.globals import SAVELOC
from tests.data_generation.run_generator import FakeRun
from tests.data_generation.stress_tests import test_decoupled_aa

import os
import random
import numpy as np
import pandas as pd
from glob import glob

This is for stress-testing the evSeq code by passing in random inputs with known expected output. We then check to see if the evSeq outputs match the expected.

In [5]:
def run_evseq_stress_test(detailed, include_nnn):
    
    # Run until we break something
    counter = -1
    while True:
        
        # Update the counter
        counter += 1
        
        # Update the global RNG to match the counter (for reproducbility)
        test_glob.RANDOM_SEED = counter
        test_glob.NP_RNG = np.random.default_rng(counter)
        test_glob.RANDOM_RNG = random.Random(counter)
    
        # Build a test run and the associated output files
        test_run = FakeRun(detailed = detailed)
        test_run.build_fastq()
        test_run.build_refseq(include_nnn)

        # Run evSeq on the generated data
#         test_run.run_evseq()

        # Get the expected outputs
        expected_out = test_run.build_expected_aa()

        # Get the true outputs. Sort the true output in the same
        # way the expected was sorted.
        most_recent_run_path = sorted(glob(os.path.join(SAVELOC, "evSeqOutput", "*")))[-1]
        true_out = pd.read_csv(os.path.join(most_recent_run_path, "OutputCounts", 
                                            "AminoAcids_Decoupled_All.csv"))
        true_out.sort_values(by = ["IndexPlate", "Well", "AaPosition", "Aa"],
                             inplace = True)

        # Test the two dataframes to make sure they agree
        test_output = test_decoupled_aa(expected_out, true_out)
        
        # If we pass, delete all constructed data (saves on memory)
        if test_output[0]:
            shutil.rmtree(most_recent_run_path)            
        
        # Break the loop if we don't pass. Return the test run
        else:
            return test_run, *test_output, counter

In [6]:
broken_run = run_evseq_stress_test(False, False)

FAILURE FOUND


To do:
1. Write code that will allow continuation past a failure. Just record the well that is the problem and pull all alignments, counts, etc. for it.

In [6]:
broken_run[0].config.length_cutoff

0.20973973040727345

In [7]:
broken_run[0].config.average_q_cutoff

20

In [8]:
broken_run[-3]

{'IndexPlate': 'DI02',
 'Plate': 'TestPlate02',
 'Well': 'E04',
 'AaPosition': '1383',
 'Aa': 'A',
 'AlignmentFrequency': 0.3050847457627119,
 'WellSeqDepth': 59,
 'Flags': 'Unexpected Variation'}

In [9]:
broken_run[-2]

{'IndexPlate': 'DI02',
 'Plate': 'TestPlate02',
 'Well': 'E04',
 'AaPosition': '1383',
 'Aa': 'A',
 'AlignmentFrequency': 0.4186046511627907,
 'WellSeqDepth': 43,
 'Flags': 'Unexpected Variation'}

In [12]:
np.mean([ord(char) - 33 for char in  """H7;<8>;<E@B<H@CHH5HGA;97<@HH:88@>GH?G5BEIC8A?8CA5FAE585<D>88D:95<AE=9?=:C778@=H>:><E<HCG@C88H>F665;D9>EI>7CCF>D<D=?8=C<@65AI77E>7=<<C7I;I@GC@BG597;:I5G>=55>8@678E=58<6@E>E8<AFIE?86;=G<>E:8@B:7>@"""])

29.298969072164947

In [13]:
len("GTTGGAGCACCCAAGACCACTCTCCGGATACTGGCCGCTGCGGCCGTATAAAAGGGATAATTGACATAGGGAGGATCTTTGTGCATGTGTGTGACCGTTCGACACAAAATACGGCGCGCGCCCAGGGGCTATTATCTTGTAATTATGGATCCTAAATCTACGTTGGACTCGAACGCGAATACGACCCAGCCTAA")

194

In [16]:
broken_run[0].config.readlength * 0.9

183.6

In [8]:
broken_run[-3]

{'IndexPlate': 'DI01',
 'Plate': 'TestPlate01',
 'Well': 'C07',
 'AaPosition': '7961',
 'Aa': 'I',
 'AlignmentFrequency': 0.8181818181818182,
 'WellSeqDepth': 154,
 'Flags': 'Unexpected Variation'}

In [9]:
broken_run[-2]

{'IndexPlate': 'DI01',
 'Plate': 'TestPlate01',
 'Well': 'C07',
 'AaPosition': '7961',
 'Aa': 'I',
 'AlignmentFrequency': 0.8181818181818182,
 'WellSeqDepth': 77,
 'Flags': 'No usable forward alignments. -- Unexpected Variation'}

In [13]:
broken_run[0].wells[46].variants[0].f_quals

array([[33, 32, 30, ..., 25, 33, 35],
       [33, 32, 30, ..., 25, 33, 35],
       [33, 32, 30, ..., 25, 33, 35],
       ...,
       [33, 32, 30, ..., 25, 33, 35],
       [33, 32, 30, ..., 25, 33, 35],
       [33, 32, 30, ..., 25, 33, 35]])

In [14]:
broken_run[0].wells[44].variants[1].total_counts

19

In [None]:
broken_run