# Test computation of escape scores
Test computation of "escape scores" by `CodonVariantTable.escape_scores`.

First create miniature test variant table:

In [16]:
import tempfile

import pandas as pd

import dms_variants.codonvarianttable

geneseq = 'ATGGGC'

variant_counts = (
    pd.DataFrame.from_records([
        ('lib1', 'AA', '', 'pre', 10),
        ('lib1', 'AG', 'ATG1CAA', 'pre', 20),
        ('lib2', 'TT', 'ATG1ATC', 'pre', 30),
        ('lib2', 'TA', 'ATG1ATC GGC2GGG', 'pre', 40),
        ('lib1', 'AA', '', 'post', 1),
        ('lib1', 'AG', 'ATG1CAA', 'post', 30),
        ('lib2', 'TT', 'ATG1ATC', 'post', 15),
        ('lib2', 'TA', 'ATG1ATC GGC2GGG', 'post', 5),
        ('lib1', 'AA', '', 'post2', 20),
        ('lib1', 'AG', 'ATG1CAA', 'post2', 40),
        ],
        columns=['library', 'barcode', 'codon_substitutions', 'sample', 'count']
        )
    .assign(variant_call_support=1)
    )

with tempfile.NamedTemporaryFile('w') as f:
    variant_counts.to_csv(f, index=False)
    f.flush()
    variants = dms_variants.codonvarianttable.CodonVariantTable.from_variant_count_df(
                    variant_count_df_file=f.name,
                    geneseq=geneseq)
    
variants.variant_count_df

Unnamed: 0,library,sample,barcode,count,variant_call_support,codon_substitutions,aa_substitutions,n_codon_substitutions,n_aa_substitutions
0,lib1,pre,AG,20,1,ATG1CAA,M1Q,1,1
1,lib1,pre,AA,10,1,,,0,0
2,lib1,post,AG,30,1,ATG1CAA,M1Q,1,1
3,lib1,post,AA,1,1,,,0,0
4,lib1,post2,AG,40,1,ATG1CAA,M1Q,1,1
5,lib1,post2,AA,20,1,,,0,0
6,lib2,pre,TA,40,1,ATG1ATC GGC2GGG,M1I,2,1
7,lib2,pre,TT,30,1,ATG1ATC,M1I,1,1
8,lib2,post,TT,15,1,ATG1ATC,M1I,1,1
9,lib2,post,TA,5,1,ATG1ATC GGC2GGG,M1I,2,1


Now compute escape scores:

In [17]:
sample_df = pd.DataFrame.from_records([
        ('name1', 'pre', 'post', 0.67),
        ('name2', 'pre', 'post2', 0.7),
        ],
        columns=['name', 'pre_sample', 'post_sample', 'frac_escape'])

variants.escape_scores(sample_df,
                       libraries=variants.libraries,
                       handle_small_B='floor',
                       floor_B=0.01,
                       pseudocount=0.5,
                       )

Unnamed: 0,name,library,pre_sample,post_sample,barcode,score,score_var,pre_count,post_count,codon_substitutions,n_codon_substitutions,aa_substitutions,n_aa_substitutions
0,name1,lib1,pre,post,AG,4.864737,134.398469,20,30,ATG1CAA,1,M1Q,1
1,name1,lib1,pre,post,AA,0.140385,0.016563,10,1,,0,,0
2,name1,lib2,pre,post,TA,0.530374,0.08485,40,5,ATG1ATC GGC2GGG,2,M1I,1
3,name1,lib2,pre,post,TT,6.643856,2683.902922,30,15,ATG1ATC,1,M1I,1
4,name2,lib1,pre,post2,AG,1.750489,0.855128,20,40,ATG1CAA,1,M1Q,1
5,name2,lib1,pre,post2,AA,1.710923,1.549655,10,20,,0,,0


In [18]:
import math

math.log(100, 2)

6.643856189774725

Create some test data:

## Old stuff checking derivatives

In [3]:
def new_meth():
    B = 1 - F * (n_post / N_post) / (n_pre / N_pre)
    sigma = (F * N_pre / (N_post * B))**2 * (n_post**2 / n_pre**3 + n_post / n_pre**2)
    return sigma

def old_meth():
    sigma = ((N_post / (n_pre * N_post - F * n_post * N_pre) - 1 / n_pre)**2 * n_pre +
             (F * N_pre / (n_pre * N_post - F * n_post * N_pre))**2 * n_post)
    return sigma
    
n_pre = 10
n_post = 1
N_pre = 100
N_post = 50
F = 0.3

print(new_meth(), old_meth())

0.004481665912177456 0.004481665912177454
