# Test computation of escape scores
Test computation of "escape scores" by `CodonVariantTable.escape_scores`.

First create miniature test variant table:

In [1]:
import tempfile

import pandas as pd

import dms_variants.codonvarianttable

geneseq = 'ATGGGC'

variant_counts = (
    pd.DataFrame.from_records([
        ('lib1', 'AA', '', 'pre', 10),
        ('lib1', 'AG', 'ATG1CAA', 'pre', 20),
        ('lib2', 'TT', 'ATG1ATC', 'pre', 30),
        ('lib2', 'TA', 'ATG1ATC GGC2GGG', 'pre', 40),
        ('lib1', 'AA', '', 'post', 1),
        ('lib1', 'AG', 'ATG1CAA', 'post', 30),
        ('lib2', 'TT', 'ATG1ATC', 'post', 15),
        ('lib2', 'TA', 'ATG1ATC GGC2GGG', 'post', 5),
        ('lib1', 'AA', '', 'post2', 20),
        ('lib1', 'AG', 'ATG1CAA', 'post2', 40),
        ],
        columns=['library', 'barcode', 'codon_substitutions', 'sample', 'count']
        )
    .assign(variant_call_support=1)
    )

with tempfile.NamedTemporaryFile('w') as f:
    variant_counts.to_csv(f, index=False)
    f.flush()
    variants = dms_variants.codonvarianttable.CodonVariantTable.from_variant_count_df(
                    variant_count_df_file=f.name,
                    geneseq=geneseq)
    
variants.variant_count_df

Unnamed: 0,library,sample,barcode,count,variant_call_support,codon_substitutions,aa_substitutions,n_codon_substitutions,n_aa_substitutions
0,lib1,pre,AG,20,1,ATG1CAA,M1Q,1,1
1,lib1,pre,AA,10,1,,,0,0
2,lib1,post,AG,30,1,ATG1CAA,M1Q,1,1
3,lib1,post,AA,1,1,,,0,0
4,lib1,post2,AG,40,1,ATG1CAA,M1Q,1,1
5,lib1,post2,AA,20,1,,,0,0
6,lib2,pre,TA,40,1,ATG1ATC GGC2GGG,M1I,2,1
7,lib2,pre,TT,30,1,ATG1ATC,M1I,1,1
8,lib2,post,TT,15,1,ATG1ATC,M1I,1,1
9,lib2,post,TA,5,1,ATG1ATC GGC2GGG,M1I,2,1


Now compute escape scores:

In [2]:
sample_df = pd.DataFrame.from_records([
        ('name1', 'pre', 'post', 0.59),
        ('name2', 'pre', 'post2', 0.2),
        ],
        columns=['name', 'pre_sample', 'post_sample', 'frac_escape'])

variants.escape_scores(sample_df,
                       libraries=variants.libraries,
                       )

Unnamed: 0,name,library,pre_sample,post_sample,barcode,score,score_var,score_at_ceil,bind_frac,pre_count,post_count,codon_substitutions,n_codon_substitutions,aa_substitutions,n_aa_substitutions
0,name1,lib1,pre,post,AG,2.740562,1.68919,False,0.149627,20,30,ATG1CAA,1,M1Q,1
1,name1,lib1,pre,post,AA,0.122887,0.005731,False,0.918348,10,1,,0,,0
2,name1,lib2,pre,post,TA,0.455799,0.009381,False,0.729106,40,5,ATG1ATC GGC2GGG,2,M1I,1
3,name1,lib2,pre,post,TT,6.643856,4.640857,True,0.01,30,15,ATG1ATC,1,M1I,1
4,name2,lib1,pre,post2,AG,0.323371,0.000916,False,0.7992,20,40,ATG1CAA,1,M1Q,1
5,name2,lib1,pre,post2,AA,0.319115,0.001611,False,0.801561,10,20,,0,,0


With a different floor:

In [3]:
variants.escape_scores(sample_df,
                       libraries=variants.libraries,
                       floor_B=0.001,
                       )

Unnamed: 0,name,library,pre_sample,post_sample,barcode,score,score_var,score_at_ceil,bind_frac,pre_count,post_count,codon_substitutions,n_codon_substitutions,aa_substitutions,n_aa_substitutions
0,name1,lib1,pre,post,AG,2.740562,1.68919,False,0.149627,20,30,ATG1CAA,1,M1Q,1
1,name1,lib1,pre,post,AA,0.122887,0.005731,False,0.918348,10,1,,0,,0
2,name1,lib2,pre,post,TA,0.455799,0.009381,False,0.729106,40,5,ATG1ATC GGC2GGG,2,M1I,1
3,name1,lib2,pre,post,TT,9.965784,225.601515,True,0.001,30,15,ATG1ATC,1,M1I,1
4,name2,lib1,pre,post2,AG,0.323371,0.000916,False,0.7992,20,40,ATG1CAA,1,M1Q,1
5,name2,lib1,pre,post2,AA,0.319115,0.001611,False,0.801561,10,20,,0,,0


With a different pseudocount:

In [4]:
variants.escape_scores(sample_df,
                       libraries=variants.libraries,
                       pseudocount=2,
                       )

Unnamed: 0,name,library,pre_sample,post_sample,barcode,score,score_var,score_at_ceil,bind_frac,pre_count,post_count,codon_substitutions,n_codon_substitutions,aa_substitutions,n_aa_substitutions
0,name1,lib1,pre,post,AG,2.587813,1.019757,False,0.166338,20,30,ATG1CAA,1,M1Q,1
1,name1,lib1,pre,post,AA,0.223114,0.004452,False,0.856714,10,1,,0,,0
2,name1,lib2,pre,post,TA,0.521172,0.00625,False,0.696806,40,5,ATG1ATC GGC2GGG,2,M1I,1
3,name1,lib2,pre,post,TT,4.896782,8.630904,False,0.033568,30,15,ATG1ATC,1,M1I,1
4,name2,lib1,pre,post2,AG,0.32706,0.000722,False,0.797159,20,40,ATG1CAA,1,M1Q,1
5,name2,lib1,pre,post2,AA,0.312566,0.001084,False,0.805208,10,20,,0,,0


By amino-acid substitution:

In [5]:
variants.escape_scores(sample_df,
                       libraries=variants.libraries,
                       floor_B=0.001,
                       by='aa_substitutions',
                       )

Unnamed: 0,name,library,pre_sample,post_sample,aa_substitutions,score,score_var,score_at_ceil,bind_frac,pre_count,post_count,n_aa_substitutions
0,name1,lib1,pre,post,M1Q,2.740562,1.68919,False,0.149627,20,30,1
1,name1,lib1,pre,post,,0.122887,0.005731,False,0.918348,10,1,0
2,name1,lib2,pre,post,M1I,1.286304,0.0,False,0.41,70,20,1
3,name2,lib1,pre,post2,M1Q,0.323371,0.000916,False,0.7992,20,40,1
4,name2,lib1,pre,post2,,0.319115,0.001611,False,0.801561,10,20,0


Include all libraries only:

In [6]:
variants.escape_scores(sample_df,
                       floor_B=0.001,
                       )

Unnamed: 0,name,library,pre_sample,post_sample,barcode,score,score_var,score_at_ceil,bind_frac,pre_count,post_count,codon_substitutions,n_codon_substitutions,aa_substitutions,n_aa_substitutions
0,name1,lib1,pre,post,AG,2.740562,1.68919,False,0.149627,20,30,ATG1CAA,1,M1Q,1
1,name1,lib1,pre,post,AA,0.122887,0.005731,False,0.918348,10,1,,0,,0
2,name1,lib2,pre,post,TA,0.455799,0.009381,False,0.729106,40,5,ATG1ATC GGC2GGG,2,M1I,1
3,name1,lib2,pre,post,TT,9.965784,225.601515,True,0.001,30,15,ATG1ATC,1,M1I,1
4,name1,all libraries,pre,post,lib1-AG,9.965784,0.0,True,0.001,20,30,ATG1CAA,1,M1Q,1
5,name1,all libraries,pre,post,lib1-AA,0.25534,0.028379,False,0.83779,10,1,,0,,0
6,name1,all libraries,pre,post,lib2-TA,0.241611,0.004002,False,0.8458,40,5,ATG1ATC GGC2GGG,2,M1I,1
7,name1,all libraries,pre,post,lib2-TT,1.241417,0.010489,False,0.422957,30,15,ATG1ATC,1,M1I,1
8,name2,lib1,pre,post2,AG,0.323371,0.000916,False,0.7992,20,40,ATG1CAA,1,M1Q,1
9,name2,lib1,pre,post2,AA,0.319115,0.001611,False,0.801561,10,20,,0,,0


Raise an error when we hit the floor on binding fraction:

In [7]:
# NBVAL_RAISES_EXCEPTION

variants.escape_scores(sample_df,
                       handle_small_B='error',
                       )

ValueError: some B_v <= 0; see `handle_small_B`