In [None]:
import pandas as pd

# Integration tests

## Todo: 
* Make a test for single-end approach, using this bam: /tscc/projects/ps-yeolab3/ekofman/sailor2/data/Hugo-A1Aligned.sortedByCoord.out.md.subset.bam
    * All of the reverse reads map to positive strand genes
    * All of the forward reads map to negative strand genes

### Run tests

In [None]:
%%bash

pwd
echo "Running"

cd ..
pwd

tests_folder="tests/strandedness_tests/"
for t in "F1R2_pair_test" "F2R1_end_second_in_pair_test" "same_pos_dif_reads_test" "tax1bp3_chr17_3665556_read_test"

do
    echo $t
    echo "Removing old files..."
    rm $tests_folder$t/* -r

    echo "Running tests..."
    bash tests/strandedness_tests/scripts/$t.sh 
   
done



In [None]:
%%bash

pwd
echo "Running"

cd ..
pwd

tests_folder="tests/singlecell_tests/"
#for t in "only_5_cells_test"
for t in "long_read_sc_test"

do
    echo $t
    echo "Removing old files..."
    rm $tests_folder$t/* -r

    echo "Running old tests..."
    bash tests/singlecell_tests/scripts/$t.sh 
   
done


# Integration test automatic checks

In [None]:
test_name_to_expectations = {
    
    "F1R2_pair_test": {
        "folder": "strandedness_tests",
        "expectations": [{
            "contig": "chr17",
            "position": 43044352,
            "count": 1,
            "coverage": 1,
            "conversion": "G>A",
            "num_rows": 1,
            "strand_conversion": "C>T",
            "strand": "-",
            "feature_name": "BRCA1"
        }]
    },

     "F2R1_end_second_in_pair_test": {
        "folder": "strandedness_tests",
        "expectations": [{
            "contig": "chr17",
            "position": 43001716,
            "count": 1,
            "coverage": 1,
            "conversion": "G>A",
            "strand_conversion": "G>A",
            "strand": "+",
            "feature_name": "RPL27"
        }]
    },
    "same_pos_dif_reads_test": {
        "folder": "strandedness_tests",
        "expectations": [{
            "contig": "chr17",
            "position": 83199872,
            "count": 9,
            "coverage": 9,
            "conversion": "C>G",
            "strand_conversion": "C>G",
            "strand": "+",
            "feature_name": "AC139099.2"
        }]
    },
    "tax1bp3_chr17_3665556_read_test": {
        "folder": "strandedness_tests",
        "expectations": [{
            "contig": "chr17",
            "position": 3665556,
            "num_rows": 1,
            "count": 1,
            "coverage": 1,
            "conversion": "G>A",
            "strand_conversion": "G>A",
            "strand": "+",
            #"feature_name": "AC139099.2"
        }]
    },

        "only_5_cells_test": {
        "folder": "singlecell_tests",
        "expectations": [{
            "contig": "9",
            "barcode": 	"GGGACCTTCGAGCCAC-1",
            "position": 3000524,
            "num_rows": 1,
            "count": 1,
            "coverage": 12,
            "conversion": "C>A",
            "strand_conversion": "G>T",
            "strand": "-"
        },
        {
            "contig": "9",
            "barcode": 	"GGGACCTTCGAGCCAC-1",
            "position": 3000525,
            "num_rows": 1,
            "count": 1,
            "coverage": 12,
            "conversion": "C>T",
            "strand_conversion": "G>A",
            "strand": "-"
        },
        {
            "contig": "9",
            "barcode": 	"GATCCCTCAGTAACGG-1",
            "position": 3000525,
            "num_rows": 1,
            "count": 1,
            "coverage": 4,
            "conversion": "C>G",
            "strand_conversion": "G>C",
            "strand": "-"
        }]
    },
    "long_read_sc_test": {
        "folder": "singlecell_tests",
        "expectations": [{
            "contig": "6",
            "barcode": 	"ENSMUST00000203816-AACGTGTTGGAGAGGG-16-G",
            "position": 115807969,
            "num_rows": 1,
            "count": 1,
            "coverage": 1,
            "conversion": "A>C",
            "strand_conversion": "T>G",
            "strand": "-"
        },
        {
            "contig": "6",
            "barcode": 	"ENSMUST00000081840-AAGTCGTACCAGGCTC-40-C",
            "position": 115805653,
            "num_rows": 1,
            "count": 1,
            "coverage": 1,
            "conversion": "G>A",
            "strand_conversion": "C>T",
            "strand": "-"
        },
        {
            "contig": "6",
            "barcode": 	"ENSMUST00000081840-AACGTGTTGGAGAGGG-40-G",
            "position": 115807015,
            "num_rows": 1,
            "count": 1,
            "coverage": 8,
            "conversion": "C>T",
            "strand_conversion": "G>A",
            "strand": "-"
        }]
    }

}

In [None]:
for test_name, info in test_name_to_expectations.items():
    print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nChecking results for {}".format(test_name))
    
    expectations_list = info.get("expectations")
    for expectations in expectations_list:
        print("\tExpecting: {}".format(expectations))
    
              
        folder = info.get("folder")
        
        contig = expectations.get("contig")
        barcode = expectations.get("barcode", None)
        
        position = expectations.get("position")
        
        final_filtered_site_info_annotated = "{}/{}/final_filtered_site_info_annotated.tsv".format(folder, test_name)
        final_filtered_site_info_annotated_df = pd.read_csv(final_filtered_site_info_annotated, sep='\t', index_col=0)
    
        row_of_interest = final_filtered_site_info_annotated_df[
            (final_filtered_site_info_annotated_df['position'] == position) &\
            (final_filtered_site_info_annotated_df['contig'].astype(str) == contig)
        ]
    
    
        if barcode:
            row_of_interest = row_of_interest[row_of_interest.barcode == barcode]
    
        failure = False
        try:
            assert(len(row_of_interest) == expectations.get("num_rows", 1))
        except Exception as e:
            print("Num rows expected: {}, was {}".format(expectations.get("num_rows", 1), len(row_of_interest)))
            failure = True
            
        for attribute in list(expectations.keys()):
            if attribute in ['count', 'coverage', 'conversion', 'strand', 'feature_name']:
                attribute_expectation = expectations.get(attribute)
                try:
                    assert(row_of_interest[attribute].iloc[0] == attribute_expectation)
                except Exception as e:
                    print("Exception: {} was {}".format(attribute, row_of_interest[attribute].iloc[0]))
                    failure = True
        if not failure:
            print("\n\t >>> {} passed! <<<\n".format(test_name))
        else:
            print("\n\t ~~~ {} FAILED! ~~~\n".format(test_name))