# analysis

In [None]:
#| default_exp analysis

In [None]:
#| export
from dgrec.utils import parse_genotypes, str_to_mut
import os
import numpy as np
from Bio import SeqIO
import matplotlib.pyplot as plt

In [None]:
from dgrec.example_data import get_example_data_dir

In [None]:
#| export
bases=list("ATGC")
def mut_rate(gen_list, #a genotype list with the number of molecules detected
             ran, #the position range in which to compute the mutation rate. If None the rate is computed for the full sequence.
             ref_seq, #reference sequence
             base_restriction = ["A","T","G","C"], #computes the mutation rate only at the base specified
             ):
    """Computes the mutation rate per base within the specified range. The rate can be computed for specific bases using the base_restriction argument."""
    nWT=gen_list[0][1]
    base_counts=dict([(b,ref_seq[ran.start:ran.stop].count(b)) for b in bases])
    nbases=sum([base_counts[b] for b in base_restriction])
    assert(nbases>0)

    nmut=0
    for g,n in gen_list:
        gens=str_to_mut(g)
        mutpos_in_range=np.array([gen[1] in ran for gen in gens if ref_seq[gen[1]] in base_restriction])
        if mutpos_in_range.any():
            nmut+=n

    return (nmut/nWT)/nbases

        

In [None]:
data_path=get_example_data_dir()
gen_list=parse_genotypes(os.path.join(data_path,"sacB_genotypes.csv"))

read_ref_file="sacB_ref.fasta"
ref=next(SeqIO.parse(os.path.join(data_path,read_ref_file),"fasta"))
ref_seq=str(ref.seq)

#showing a few example lines
for g,n in gen_list[1:200:20]:
    print(n,"\t",g)

279 	 A91G
28 	 A68C
15 	 A72G,A79T,A91T
10 	 A61G,A72G
6 	 A61G,A68G
6 	 A68G,A76G,A91G
5 	 A61T,A79G
4 	 A86T
4 	 A72G,A76G,A86G,A91T
3 	 A61T,A76G,A91G


In [None]:
TR_range=range(50,119)
before_TR_range=range(50)
print(f"Mutation rate on full sequence: {mut_rate(gen_list,range(140),ref_seq):.1e}\n\
Mutation rate on the VR: {mut_rate(gen_list,TR_range,ref_seq):.1e}\n\
Mutation rate outside the VR: {mut_rate(gen_list,before_TR_range,ref_seq):.1e}")

Mutation rate on full sequence: 7.8e-04
Mutation rate on the VR: 1.5e-03
Mutation rate outside the VR: 7.5e-05


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()