In [25]:
from itertools import product

def get_codon_table(table_number):
    with open('translation_tables.txt', 'r') as f:
        s = f.read().partition('==== Table '+str(table_number)+' ====')[2].partition('=================')[0]

    codon_table = {}
    for a in s.split('\n'):
        codones_string = a.split(' ')
        codones_string = ' '.join(codones_string).split()
        for ch in range(len(codones_string)):
            if codones_string[ch].isupper() and (len(codones_string[ch]) == 3):
                if ch+3 in range(len(codones_string)):
                    if codones_string[ch+3] == 'i':
                        codon_table[codones_string[ch]] = (codones_string[ch+1], 
                                                           codones_string[ch+2], 
                                                           codones_string[ch+3])
                        ch += 2
                    else:
                        codon_table[codones_string[ch]] = (codones_string[ch+1], 
                                                           codones_string[ch+2])
                        ch += 1
                else:
                    codon_table[codones_string[ch]] = (codones_string[ch+1], 
                                                       codones_string[ch+2])
    return codon_table

def translate(seq, codon_table): 
    seq = seq.replace('U', 'T')
    protein = ''
    if len(seq)%3 == 0: 
        for i in range(0, len(seq), 3): 
            codon = seq[i:i + 3] 
            protein+= codon_table[codon][0]
    else:
        print('Number of nucleotides does not divide into three')
    return protein

def get_codon_variations(codon, codon_table):
    codon = codon.replace('U', 'T')
    amino_acid = translate(codon, codon_table)
    
    codons_all = []
    for c in product('ACGT', repeat = 3):
        codons_all.append(''.join(c))
    
    variations_raw = []
    for ex in codons_all:
        diff = lambda l1,l2: len([x for ind, x in enumerate(l1) if l1[ind] != l2[ind]])
        variations_raw.append([ex,diff(codon, ex)])
       
    variations_sorted = sorted(variations_raw, key=lambda x:x[1])
    
    variations = []
    for var in variations_sorted:
        amino_acid_m = translate(var[0], codon_table) 
        if (amino_acid_m not in [i[0] for i in variations]) and (amino_acid_m != '*'):
            variations.append([amino_acid_m, var[1]])
    
    variations_count = {
        0 : [i[1] for i in variations].count(0),
        1 : [i[1] for i in variations].count(1),
        2 : [i[1] for i in variations].count(2),
        3 : [i[1] for i in variations].count(3)
    }
        
    return variations_count

def get_sequence_number(sequence, mutate_first_codon, max_nmut, codon_table):
    a = 0
    implement = {}
    implement[(0,0)] = 1
    mutations = 0
    current_codon = 1
    length = len(translate(sequence, codon_table))

    while a <= len(sequence)-3:
        max_mutations = 0 if current_codon == 1 and not mutate_first_codon else 3
        prev_mutations = mutations
        mutations += max_mutations
        current_codon_variations = get_codon_variations(sequence[a:a+3], codon_table)

        nmut = 0
        while nmut <= mutations:
            extra_m = 0
            implement[(nmut, current_codon)] = 0
            while extra_m <= max_mutations:
                prev_nmut = nmut - extra_m
                if (prev_nmut < 0) or (prev_nmut > prev_mutations):
                    extra_m += 1
                    continue
                implement[(nmut, current_codon)] += implement[(prev_nmut, current_codon - 1)] * current_codon_variations[extra_m]
                extra_m += 1
            nmut += 1

        current_codon += 1
        a += 3

    integral = 0
    integral_dict = {}
    nmut = 0
    while nmut <= mutations:
        integral += implement[(nmut, length)]
        integral_dict[(nmut, length)] = integral
        nmut += 1

    nmut = 0
    while (nmut <= max_nmut) and (nmut <= max(implement.keys())[0]):
        print('{0} {1} {2}'.format(nmut, implement[(nmut,length)], integral_dict[(nmut,length)]))
        nmut += 1

        
mutate_first_codon = 1
gfp = 'AUGUCG'
gfp = 'ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAG'
max_nmut = 30
codon_table = get_codon_table(1)

get_sequence_number(gfp, mutate_first_codon, max_nmut, codon_table)

0 1 1
1 1434 1435
2 1026245 1027680
3 488701165 489728845
4 174211795730 174701524575
5 49588531123039 49763232647614
6 11740378886325141 11790142118972755
7 2378009168896698712 2389799311015671467
8 420658067303126794099 423047866614142465566
9 66018784246395112841157 66441832113009255306723
10 9307270679864365129058044 9373712511977374384364767
11 1190576688488546726009539077 1199950401000524100393903844
12 139339937816878639838380266741 140539888217879163938774170585
13 15024581387872006685646548256638 15165121276089885849585322427223
14 1501460685145305351830484015683199 1516625806421395237680069338110422
15 139775380052485675831683604584377853 141292005858907071069363673922488275
16 12175474099630847477568188477590165283 12316766105489754548637552151512653558
17 996272613079534715616819830509123819903 1008589379185024470165457382660636473461
18 76844326235319571073125918609974815517580 77852915614504595543291375992635451991041
19 5604386082685007724372794805186643211861921 5682238

In [24]:
gfp = 'AUGUCG'

print(get_codon_variations('AUG', get_codon_table(1)))
print(translate(gfp, get_codon_table(1)))

{0: 1, 1: 6, 2: 9, 3: 4}
MS


In [11]:
get_codon_table(1)

{'AAA': ('K', 'Lys'),
 'AAC': ('N', 'Asn'),
 'AAG': ('K', 'Lys'),
 'AAT': ('N', 'Asn'),
 'ACA': ('T', 'Thr'),
 'ACC': ('T', 'Thr'),
 'ACG': ('T', 'Thr'),
 'ACT': ('T', 'Thr'),
 'AGA': ('R', 'Arg'),
 'AGC': ('S', 'Ser'),
 'AGG': ('R', 'Arg'),
 'AGT': ('S', 'Ser'),
 'ATA': ('I', 'Ile'),
 'ATC': ('I', 'Ile'),
 'ATG': ('M', 'Met', 'i'),
 'ATT': ('I', 'Ile'),
 'CAA': ('Q', 'Gln'),
 'CAC': ('H', 'His'),
 'CAG': ('Q', 'Gln'),
 'CAT': ('H', 'His'),
 'CCA': ('P', 'Pro'),
 'CCC': ('P', 'Pro'),
 'CCG': ('P', 'Pro'),
 'CCT': ('P', 'Pro'),
 'CGA': ('R', 'Arg'),
 'CGC': ('R', 'Arg'),
 'CGG': ('R', 'Arg'),
 'CGT': ('R', 'Arg'),
 'CTA': ('L', 'Leu'),
 'CTC': ('L', 'Leu'),
 'CTG': ('L', 'Leu', 'i'),
 'CTT': ('L', 'Leu'),
 'GAA': ('E', 'Glu'),
 'GAC': ('D', 'Asp'),
 'GAG': ('E', 'Glu'),
 'GAT': ('D', 'Asp'),
 'GCA': ('A', 'Ala'),
 'GCC': ('A', 'Ala'),
 'GCG': ('A', 'Ala'),
 'GCT': ('A', 'Ala'),
 'GGA': ('G', 'Gly'),
 'GGC': ('G', 'Gly'),
 'GGG': ('G', 'Gly'),
 'GGT': ('G', 'Gly'),
 'GTA': ('V', 'Val'),
