In [81]:
import re
import numpy as np
import pprint
from Bio import Seq, SeqUtils, SeqIO

In [82]:
seqs = []
for seq_record in SeqIO.parse("measles.txt", "fasta"):
    seqs.append(seq_record.seq)
#seqs

In [83]:
# DNA codon table
protein = {"TTT" : "F", "CTT" : "L", "ATT" : "I", "GTT" : "V",
           "TTC" : "F", "CTC" : "L", "ATC" : "I", "GTC" : "V",
           "TTA" : "L", "CTA" : "L", "ATA" : "I", "GTA" : "V",
           "TTG" : "L", "CTG" : "L", "ATG" : "M", "GTG" : "V",
           "TCT" : "S", "CCT" : "P", "ACT" : "T", "GCT" : "A",
           "TCC" : "S", "CCC" : "P", "ACC" : "T", "GCC" : "A",
           "TCA" : "S", "CCA" : "P", "ACA" : "T", "GCA" : "A",
           "TCG" : "S", "CCG" : "P", "ACG" : "T", "GCG" : "A",
           "TAT" : "Y", "CAT" : "H", "AAT" : "N", "GAT" : "D",
           "TAC" : "Y", "CAC" : "H", "AAC" : "N", "GAC" : "D",
           "TAA" : "STOP", "CAA" : "Q", "AAA" : "K", "GAA" : "E",
           "TAG" : "STOP", "CAG" : "Q", "AAG" : "K", "GAG" : "E",
           "TGT" : "C", "CGT" : "R", "AGT" : "S", "GGT" : "G",
           "TGC" : "C", "CGC" : "R", "AGC" : "S", "GGC" : "G",
           "TGA" : "STOP", "CGA" : "R", "AGA" : "R", "GGA" : "G",
           "TGG" : "W", "CGG" : "R", "AGG" : "R", "GGG" : "G" 
           }


In [84]:
def count_codons(seq, dict_codons):
    seq = str(seq)
    for i in range(0, len(seq)-3, 3):
        if seq[i:i+3] not in dict_codons.keys():
            dict_codons[seq[i:i+3]] = 1
        else:
            dict_codons[seq[i:i+3]] += 1
    return dict_codons

In [85]:
# with zero shift
cdns_frq = {}
for id_sq, seq in enumerate(seqs):
    cdns_frq[id_sq] = {}
    cdns_frq[id_sq] = count_codons(seq, cdns_frq[id_sq])
cdns_frq

{0: {'ACC': 106,
  'AAA': 101,
  'CAA': 130,
  'AGT': 65,
  'TGG': 86,
  'GTA': 48,
  'AGG': 103,
  'ATA': 98,
  'GAT': 87,
  'TCA': 156,
  'ATG': 125,
  'ATC': 117,
  'TTC': 59,
  'TAG': 92,
  'TAC': 48,
  'ACT': 75,
  'AGA': 123,
  'TCC': 110,
  'TAT': 69,
  'CAG': 139,
  'GGA': 120,
  'GAG': 96,
  'CCG': 53,
  'CCA': 124,
  'CAC': 79,
  'TTT': 58,
  'TGA': 101,
  'GCT': 52,
  'CAT': 76,
  'TGT': 74,
  'GAA': 89,
  'ACA': 143,
  'AAC': 87,
  'TTA': 96,
  'GTG': 69,
  'CTG': 100,
  'ATT': 83,
  'CCT': 60,
  'CTC': 92,
  'GAC': 67,
  'GGT': 82,
  'TAA': 91,
  'TTG': 126,
  'CGG': 49,
  'GCG': 27,
  'GGC': 72,
  'GGG': 100,
  'GTC': 48,
  'AAT': 83,
  'ACG': 46,
  'GCA': 90,
  'GCC': 77,
  'TCG': 50,
  'TCT': 73,
  'AAG': 126,
  'TGC': 65,
  'CCC': 106,
  'AGC': 76,
  'CTT': 61,
  'CTA': 73,
  'GTT': 52,
  'CGA': 33,
  'CGC': 22,
  'CGT': 13},
 1: {'ACC': 118,
  'AAA': 110,
  'CAA': 113,
  'AGT': 71,
  'TGG': 108,
  'GTA': 33,
  'AGG': 130,
  'ATA': 103,
  'GAT': 72,
  'CTT': 50,
  'ACT

In [86]:
# with one shift
cdns_frq1 = {}
for id_sq, seq in enumerate(seqs):
    cdns_frq1[id_sq] = {}
    cdns_frq1[id_sq] = count_codons(seq[1:], cdns_frq1[id_sq])
#cdns_frq1

In [87]:
# with two shift
cdns_frq2 = {}
for id_sq, seq in enumerate(seqs):
    cdns_frq2[id_sq] = {}
    cdns_frq2[id_sq] = count_codons(seq[2:], cdns_frq2[id_sq])
#cdns_frq2

In [88]:
# Counting aminas task 
# translating the seq
def translate(seq, protein):
    trans_seq = ''
    for i in range(0, len(seq)-3, 3):
        trans_seq += protein[seq[i: i+3]]  
    return trans_seq

In [89]:
def count_aminas(seq, list_aminas, aminas_dict):

    for amina in list_aminas:
        aminas_dict[amina] = seq.count(amina)
    return aminas_dict

In [90]:
aminas, aminas1, aminas2 = {}, {}, {}
list_aminas = np.unique(list(protein.values()))

# with zero shift
for id_sq, seq in enumerate(seqs):
    aminas[id_sq] = {}
    trans_seq = translate(seq, protein)
    aminas[id_sq] = count_aminas(trans_seq, list_aminas, aminas[id_sq])

    # with one shift
    aminas1[id_sq] = {}
    trans_seq = translate(seq[1:], protein)
    aminas1[id_sq] = count_aminas(trans_seq, list_aminas, aminas1[id_sq])
    
     # with one shift
    aminas2[id_sq] = {}
    trans_seq = translate(seq[2:], protein)
    aminas2[id_sq] = count_aminas(trans_seq, list_aminas, aminas2[id_sq])

In [91]:
pprint.pprint(aminas)

{0: {'A': 246,
     'C': 139,
     'D': 154,
     'E': 185,
     'F': 117,
     'G': 374,
     'H': 155,
     'I': 298,
     'K': 227,
     'L': 548,
     'M': 125,
     'N': 170,
     'P': 627,
     'Q': 269,
     'R': 343,
     'S': 814,
     'STOP': 284,
     'T': 654,
     'V': 217,
     'W': 86,
     'Y': 117},
 1: {'A': 238,
     'C': 116,
     'D': 137,
     'E': 187,
     'F': 104,
     'G': 308,
     'H': 157,
     'I': 285,
     'K': 231,
     'L': 508,
     'M': 124,
     'N': 123,
     'P': 718,
     'Q': 264,
     'R': 413,
     'S': 912,
     'STOP': 336,
     'T': 727,
     'V': 201,
     'W': 108,
     'Y': 126},
 2: {'A': 166,
     'C': 140,
     'D': 154,
     'E': 168,
     'F': 155,
     'G': 263,
     'H': 146,
     'I': 365,
     'K': 288,
     'L': 652,
     'M': 135,
     'N': 195,
     'P': 599,
     'Q': 275,
     'R': 265,
     'S': 858,
     'STOP': 381,
     'T': 688,
     'V': 245,
     'W': 86,
     'Y': 150},
 3: {'A': 121,
     'C': 154,
     'D': 150,
