In [1]:
from teemi.design.fetch_sequences import retrieve_sequences_from_ncbi, read_fasta_files

In [2]:
acc_numbers = ['Q8VWZ7',
 'AGX93053',
 'AGX93055',
 'AGX93051',
 'BAP90522',
 'D1MI46',
 'XP_022858342',
 'AES93118']

In [3]:
chosen_g8h_ncbi_fasta = retrieve_sequences_from_ncbi(acc_numbers,'../data/dummy_protein_seq/dummy_seqs.fasta')


In [4]:
fasta_seqs = read_fasta_files('../data/dummy_protein_seq/dummy_seqs.fasta')

In [5]:
fasta_seqs

[SeqRecord(seq=Seq('MDYLTIILTLLFALTLYEAFSYLSRRTKNLPPGPSPLPFIGSLHLLGDQPHKSL...STL'), id='sp|Q8VWZ7.1|C76B6_CATRO', name='sp|Q8VWZ7.1|C76B6_CATRO', description='sp|Q8VWZ7.1|C76B6_CATRO RecName: Full=Geraniol 8-hydroxylase; AltName: Full=Cytochrome P450 76B6; AltName: Full=Geraniol 10-hydroxylase; Short=CrG10H', dbxrefs=[]),
 SeqRecord(seq=Seq('MDYLTITLGLLFALTFYQGLSYLSRRSKKLPPGPAPLPIIGNLHMLGDQPHKSL...IPF'), id='AGX93053.1', name='AGX93053.1', description='AGX93053.1 geraniol 10-hydroxylase-like protein [Rauvolfia serpentina]', dbxrefs=[]),
 SeqRecord(seq=Seq('MDYLTIALGLLFALTFYQGLTYLSRRSQKLPPGPLPLPIIGNLHLLGDHPHKSL...SPL'), id='AGX93055.1', name='AGX93055.1', description='AGX93055.1 geraniol 10-hydroxylase-like protein [Vinca minor]', dbxrefs=[]),
 SeqRecord(seq=Seq('MDYLTIVLGLLFALTLYQGLSSLSRKAKKLPPGPTPLPIIGNLHLLGDQPHKSL...ISL'), id='AGX93051.1', name='AGX93051.1', description='AGX93051.1 geraniol 10-hydroxylase-like protein [Cinchona calisaya]', dbxrefs=[]),
 SeqRecord(seq=Seq('MRILDSEVSKI

In [6]:
from dnachisel.biotools import reverse_translate

In [7]:
#add  * to indicate stop codon - and reverse traslate it
for seq in fasta_seqs: 
    seq.seq = reverse_translate(seq.seq)

In [8]:
fasta_seqs

[SeqRecord(seq='ATGGATTATTTAACTATTATTTTAACTTTATTATTTGCTTTAACTTTATATGAAGCTTTTTCTTATTTATCTCGTCGTACTAAAAATTTACCTCCTGGTCCTTCTCCTTTACCTTTTATTGGTTCTTTACATTTATTAGGTGATCAACCTCATAAATCTTTAGCTAAATTATCTAAAAAACATGGTCCTATTATGTCTTTAAAATTAGGTCAAATTACTACTATTGTTATTTCTTCTTCTACTATGGCTAAAGAAGTTTTACAAAAACAAGATTTAGCTTTTTCTTCTCGTTCTGTTCCTAATGCTTTACATGCTCATAATCAATTTAAATTTTCTGTTGTTTGGTTACCTGTTGCTTCTCGTTGGCGTTCTTTACGTAAAGTTTTAAATTCTAATATTTTTTCTGGTAATCGTTTAGATGCTAATCAACATTTACGTACTCGTAAAGTTCAAGAATTAATTGCTTATTGTCGTAAAAATTCTCAATCTGGTGAAGCTGTTGATGTTGGTCGTGCTGCTTTTCGTACTTCTTTAAATTTATTATCTAATTTAATTTTTTCTAAAGATTTAACTGATCCTTATTCTGATTCTGCTAAAGAATTTAAAGATTTAGTTTGGAATATTATGGTTGAAGCTGGTAAACCTAATTTAGTTGATTTTTTTCCTTTATTAGAAAAAGTTGATCCTCAAGGTATTCGTCATCGTATGACTATTCATTTTGGTGAAGTTTTAAAATTATTTGGTGGTTTAGTTAATGAACGTTTAGAACAACGTCGTTCTAAAGGTGAAAAAAATGATGTTTTAGATGTTTTATTAACTACTTCTCAAGAATCTCCTGAAGAAATTGATCGTACTCATATTGAACGTATGTGTTTAGATTTATTTGTTGCTGGTACTGATACTACTTCTTCTACTTTAGAATGGGCTATGTCTGAAATGTTAAAAAATCCTGATAAAATGAAAAAAACTCAAGATGAATTAGCT

In [65]:
from dnachisel import DnaOptimizationProblem, EnforceGCContent, CodonOptimize
from typing import List
from Bio.SeqRecord import SeqRecord

def codon_optimize_with_dnachisel(
    sequences: List[SeqRecord],
    lower_GC: float = 0.1,
    upper_GC: float = 0.9,
    species: str = None,
    codon_usage_table = None,
    window: int = 100,
    method : str = 'use_best_codon'
) -> List[SeqRecord]:
    
    """Codon-optimize sequences with_dnachisel.

    Parameters
    ----------
    sequences : list
        list of Bio.SeqRecord objects
    lower_GC : float
        the lowest GC content in the region of 50 bp
    upper_GC : float
        the highest GC content in the region of 50 bp
    species : str
        name of the species for which to optimize the sequence.
        examples: 'e_coli, s_cerevisiae, h_sapiens, c_elegans, b_subtilis, d_melanogaster
        check python_codon_tables for more info.
    codon_usage_table:
        a codon table following the structure of:
        {'*': {'TAA': 0.0, 'TAG': 0.0, 'TGA': 1.0},...
    method : str 
        Either ‘use_best_codon’, ‘match_codon_usage’, or ‘harmonize_rca’. Default is ‘use_best_codon’.
        

    Returns
    -------
    list of codon optimized sequences for yeast
    """
    
    
    if not species and not codon_usage_table:
        raise ValueError("At least one of `species` and `codon_usage_table` must be specified.")
    
    codon_optimized_seqs = []
    

    # DEFINE THE OPTIMIZATION PROBLEM
    for seq in sequences:
        if species: 
            problem = DnaOptimizationProblem(
                sequence=str(seq.seq),
                objectives=[CodonOptimize(species=species, method=method)],
            )

            # SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE
            #problem.resolve_constraints()
            problem.optimize()
            
            
            print(problem.objectives_text_summary())

        if codon_usage_table: 
            problem = DnaOptimizationProblem(
                                        sequence=str(seq.seq),
                                        objectives=[CodonOptimize(codon_usage_table=codon_usage_table, method=method)])

            # SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE
            #problem.resolve_constraints()
            problem.optimize()

            print(problem.objectives_text_summary())

        # GET THE FINAL SEQUENCE AS ANNOTATED BIOPYTHON RECORDS)
        final_record = problem.to_record(with_sequence_edits=True)
        final_record.id = seq.id
        final_record.name = seq.name
        final_record.description = seq.description

        codon_optimized_seqs.append(final_record)

    return codon_optimized_seqs

In [66]:
A_oryzae_rib40 = {'*': {'TAA': 0.0, 'TAG': 0.0, 'TGA': 1.0},
 'A': {'GCA': 0.07, 'GCC': 0.4, 'GCG': 0.33, 'GCT': 0.2},
 'C': {'TGC': 1.0, 'TGT': 0.0},
 'D': {'GAC': 0.87, 'GAT': 0.13},
 'E': {'GAA': 0.14, 'GAG': 0.86},
 'F': {'TTC': 1.0, 'TTT': 0.0},
 'G': {'GGA': 0.11, 'GGC': 0.42, 'GGG': 0.32, 'GGT': 0.16},
 'H': {'CAC': 1.0, 'CAT': 0.0},
 'I': {'ATA': 0.0, 'ATC': 0.93, 'ATT': 0.07},
 'K': {'AAA': 0.0, 'AAG': 1.0},
 'L': {'CTA': 0.0,
  'CTC': 0.42,
  'CTG': 0.54,
  'CTT': 0.04,
  'TTA': 0.0,
  'TTG': 0.0},
 'M': {'ATG': 1.0},
 'N': {'AAC': 0.83, 'AAT': 0.17},
 'P': {'CCA': 0.06, 'CCC': 0.18, 'CCG': 0.65, 'CCT': 0.12},
 'Q': {'CAA': 0.2, 'CAG': 0.8},
 'R': {'AGA': 0.0,
  'AGG': 0.09,
  'CGA': 0.12,
  'CGC': 0.35,
  'CGG': 0.35,
  'CGT': 0.09},
 'S': {'AGC': 0.27,
  'AGT': 0.0,
  'TCA': 0.0,
  'TCC': 0.09,
  'TCG': 0.45,
  'TCT': 0.18},
 'T': {'ACA': 0.08, 'ACC': 0.5, 'ACG': 0.35, 'ACT': 0.08},
 'V': {'GTA': 0.12, 'GTC': 0.32, 'GTG': 0.56, 'GTT': 0.0},
 'W': {'TGG': 1.0},
 'Y': {'TAC': 0.89, 'TAT': 0.11}}

In [67]:
codon_optimized_seqs = codon_optimize_with_dnachisel(fasta_seqs,codon_usage_table = A_oryzae_rib40 )
#codon_optimized_seqs = codon_optimize_with_dnachisel(fasta_seqs,species ='s_cerevisiae' )


objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1479]((cust...]
location:   0%|                               | 0/472 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/472 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1479]((custom table)) 
           │ Codon opt. on window 0-1479 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1479]((cust...]
location:   0%|                               | 0/474 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/474 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1479]((custom table)) 
           │ Codon opt. on window 0-1479 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1479]((cust...]
location:   0%|                               | 0/475 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/475 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1479]((custom table)) 
           │ Codon opt. on window 0-1479 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1479]((cust...]
location:   0%|                               | 0/474 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/474 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1479]((custom table)) 
           │ Codon opt. on window 0-1479 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1536]((cust...]
location:   0%|                               | 0/491 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/491 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1536]((custom table)) 
           │ Codon opt. on window 0-1536 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1485]((cust...]
location:   0%|                               | 0/473 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/473 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1485]((custom table)) 
           │ Codon opt. on window 0-1485 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1482]((cust...]
location:   0%|                               | 0/473 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/473 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1482]((custom table)) 
           │ Codon opt. on window 0-1482 scored -0.00E+00




objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-1503]((cust...]
location:   0%|                               | 0/476 [00:00<?, ?it/s, now=None][A
location:   0%|                                | 0/476 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-1503]((custom table)) 
           │ Codon opt. on window 0-1503 scored -0.00E+00






In [68]:
codon_optimized_seqs

[SeqRecord(seq=Seq('ATGGAGTACTTCACCATGATGTTCACCTTCTTCTTCGCCTTCACCTTCTACGAC...TTC'), id='sp|Q8VWZ7.1|C76B6_CATRO', name='sp|Q8VWZ7.1|C76B6_CATRO', description='sp|Q8VWZ7.1|C76B6_CATRO RecName: Full=Geraniol 8-hydroxylase; AltName: Full=Cytochrome P450 76B6; AltName: Full=Geraniol 10-hydroxylase; Short=CrG10H', dbxrefs=[]),
 SeqRecord(seq=Seq('ATGGAGTACTTCACCATGACCTTCGGCTTCTTCTTCGCCTTCACCTTCTACCAC...TTC'), id='AGX93053.1', name='AGX93053.1', description='AGX93053.1 geraniol 10-hydroxylase-like protein [Rauvolfia serpentina]', dbxrefs=[]),
 SeqRecord(seq=Seq('ATGGAGTACTTCACCATGGCCTTCGGCTTCTTCTTCGCCTTCACCTTCTACCAC...TTC'), id='AGX93055.1', name='AGX93055.1', description='AGX93055.1 geraniol 10-hydroxylase-like protein [Vinca minor]', dbxrefs=[]),
 SeqRecord(seq=Seq('ATGGAGTACTTCACCATGGTGTTCGGCTTCTTCTTCGCCTTCACCTTCTACCAC...TTC'), id='AGX93051.1', name='AGX93051.1', description='AGX93051.1 geraniol 10-hydroxylase-like protein [Cinchona calisaya]', dbxrefs=[]),
 SeqRecord(seq=Seq('ATGCGGATGTT

In [69]:
aa_seq_codon = codon_optimized_seqs[0].translate()

In [70]:
from Bio.Seq import Seq

In [71]:
first = Seq(str(fasta_seqs[0].seq))
aa_seq_original = str(first.translate())
aa_seq_original

'MDYLTIILTLLFALTLYEAFSYLSRRTKNLPPGPSPLPFIGSLHLLGDQPHKSLAKLSKKHGPIMSLKLGQITTIVISSSTMAKEVLQKQDLAFSSRSVPNALHAHNQFKFSVVWLPVASRWRSLRKVLNSNIFSGNRLDANQHLRTRKVQELIAYCRKNSQSGEAVDVGRAAFRTSLNLLSNLIFSKDLTDPYSDSAKEFKDLVWNIMVEAGKPNLVDFFPLLEKVDPQGIRHRMTIHFGEVLKLFGGLVNERLEQRRSKGEKNDVLDVLLTTSQESPEEIDRTHIERMCLDLFVAGTDTTSSTLEWAMSEMLKNPDKMKKTQDELAQVIGRGKTIEESDINRLPYLRCVMKETLRIHPPVPFLIPRKVEQSVEVCGYNVPKGSQVLVNAWAIGRDETVWDDALAFKPERFMESELDIRGRDFELIPFGAGRRICPGLPLALRTVPLMLGSLLNSFNWKLEGGMAPKDLDMEEKFGITLQKAHPLRAVPSTL'

In [72]:
print(aa_seq_original)
print(aa_seq_codon.seq)

MDYLTIILTLLFALTLYEAFSYLSRRTKNLPPGPSPLPFIGSLHLLGDQPHKSLAKLSKKHGPIMSLKLGQITTIVISSSTMAKEVLQKQDLAFSSRSVPNALHAHNQFKFSVVWLPVASRWRSLRKVLNSNIFSGNRLDANQHLRTRKVQELIAYCRKNSQSGEAVDVGRAAFRTSLNLLSNLIFSKDLTDPYSDSAKEFKDLVWNIMVEAGKPNLVDFFPLLEKVDPQGIRHRMTIHFGEVLKLFGGLVNERLEQRRSKGEKNDVLDVLLTTSQESPEEIDRTHIERMCLDLFVAGTDTTSSTLEWAMSEMLKNPDKMKKTQDELAQVIGRGKTIEESDINRLPYLRCVMKETLRIHPPVPFLIPRKVEQSVEVCGYNVPKGSQVLVNAWAIGRDETVWDDALAFKPERFMESELDIRGRDFELIPFGAGRRICPGLPLALRTVPLMLGSLLNSFNWKLEGGMAPKDLDMEEKFGITLQKAHPLRAVPSTL
MEYFTMMFTFFFAFTFYDAFSYFSRRTNKFPPGPSPFPFMGSFQFFGEHPQNSFANFSNNQGPMMSFNFGHMTTMVMSSSTMANDVFHNHEFAFSSRSVPKAFQAQKHFNFSVVWFPVASRWRSFRNVFKSKMFSGKRFEAKHQFRTRNVHDFMAYWRNKSHSGDAVEVGRAAFRTSFKFFSKFMFSNEFTEPYSESANDFNEFVWKMMVDAGNPKFVEFFPFFDNVEPHGMRQRMTMQFGDVFNFFGGFVKDRFDHRRSNGDNKEVFEVFFTTSHDSPDDMERTQMDRMWFEFFVAGTETTSSTFDWAMSDMFNKPENMNNTHEDFAHVMGRGNTMDDSEMKRFPYFRWVMNDTFRMQPPVPFFMPRNVDHSVDVWGYKVPNGSHVFVKAWAMGREDTVWEEAFAFNPDRFMDSDFEMRGREFDFMPFGAGRRMWPGFPFAFRTVPFMFGSFFKSFKWNFDGGMAPNEFEMDDNFGMTFHNAQPFRAVPSTF


In [73]:
str(aa_seq_codon.seq)

'MEYFTMMFTFFFAFTFYDAFSYFSRRTNKFPPGPSPFPFMGSFQFFGEHPQNSFANFSNNQGPMMSFNFGHMTTMVMSSSTMANDVFHNHEFAFSSRSVPKAFQAQKHFNFSVVWFPVASRWRSFRNVFKSKMFSGKRFEAKHQFRTRNVHDFMAYWRNKSHSGDAVEVGRAAFRTSFKFFSKFMFSNEFTEPYSESANDFNEFVWKMMVDAGNPKFVEFFPFFDNVEPHGMRQRMTMQFGDVFNFFGGFVKDRFDHRRSNGDNKEVFEVFFTTSHDSPDDMERTQMDRMWFEFFVAGTETTSSTFDWAMSDMFNKPENMNNTHEDFAHVMGRGNTMDDSEMKRFPYFRWVMNDTFRMQPPVPFFMPRNVDHSVDVWGYKVPNGSHVFVKAWAMGREDTVWEEAFAFNPDRFMDSDFEMRGREFDFMPFGAGRRMWPGFPFAFRTVPFMFGSFFKSFKWNFDGGMAPNEFEMDDNFGMTFHNAQPFRAVPSTF'

In [74]:
str(aa_seq_original) ==  str(aa_seq_codon.seq)

False

In [58]:
SeqRecord(str(fasta_seqs[0].seq)).translate()

TypeError: translate() takes no keyword arguments

In [92]:
my_seq = 'ATGTTTGGGAAA'

In [91]:
Seq(my_seq).translate()

Seq('MFGK')

In [100]:
from dnachisel import *

# DEFINE THE OPTIMIZATION PROBLEM

problem = DnaOptimizationProblem(
    sequence=my_seq, objectives=[CodonOptimize(codon_usage_table=aspergillus_oryzae, method='use_best_codon')])

# SOLVE THE CONSTRAINTS, OPTIMIZE WITH RESPECT TO THE OBJECTIVE

problem.resolve_constraints()
problem.optimize()

# PRINT SUMMARIES TO CHECK THAT CONSTRAINTS PASS

#print(problem.constraints_text_summary())
print(problem.objectives_text_summary())

# GET THE FINAL SEQUENCE (AS STRING OR ANNOTATED BIOPYTHON RECORDS)

final_sequence = problem.sequence  # string

objective:   0%|        | 0/1 [00:00<?, ?it/s, now=MaximizeCAI[0-12]((custom...]
location:   0%|                                 | 0/3 [00:00<?, ?it/s, now=None][A
location:   0%|                                  | 0/3 [00:00<?, ?it/s, now=3-6][A
                                                                                [A

===> TOTAL OBJECTIVES SCORE:         0
✔        0 ┍ MaximizeCAI[0-12]((custom table)) 
           │ Codon opt. on window 0-12 scored -0.00E+00






In [101]:
final_sequence

'ATGTTCGGCAAC'

In [102]:
Seq(final_sequence).translate()

Seq('MFGN')

In [93]:
import python_codon_tables as pct

# PRINT THE LIST OF NAMES OF ALL AVAILABLE TABLES
print("Available tables:", pct.available_codon_tables_names)

# LOAD ONE TABLE BY NAME
table = pct.get_codons_table("b_subtilis_1423")
print(table["T"]["ACA"])  # returns 0.4
print(table["*"]["TAA"])  # returns 0.61


# LOAD ALL TABLES AT ONCE
codon_tables = pct.get_all_available_codons_tables()
print(codon_tables["c_elegans_6239"]["L"]["CTA"])  # returns 0.09

Available tables: ['b_subtilis_1423', 'd_melanogaster_7227', 'm_musculus_domesticus_10092', 'm_musculus_10090', 'e_coli_316407', 'g_gallus_9031', 'c_elegans_6239', 's_cerevisiae_4932', 'h_sapiens_9606']
0.4
0.61
0.09


In [94]:
from python_codon_tables import get_codons_table

In [99]:
from python_codon_tables import get_codons_table
aspergillus_oryzae = get_codons_table(5062) # aspergillus taxid = 5062
aspergillus_oryzae

{'*': {'TAA': 0.33, 'TAG': 0.29, 'TGA': 0.38},
 'A': {'GCA': 0.23, 'GCC': 0.3, 'GCG': 0.2, 'GCT': 0.27},
 'C': {'TGC': 0.54, 'TGT': 0.46},
 'D': {'GAC': 0.47, 'GAT': 0.53},
 'E': {'GAA': 0.44, 'GAG': 0.56},
 'F': {'TTC': 0.62, 'TTT': 0.38},
 'G': {'GGA': 0.24, 'GGC': 0.31, 'GGG': 0.17, 'GGT': 0.28},
 'H': {'CAC': 0.47, 'CAT': 0.53},
 'I': {'ATA': 0.14, 'ATC': 0.5, 'ATT': 0.36},
 'K': {'AAA': 0.36, 'AAG': 0.64},
 'L': {'CTA': 0.11,
  'CTC': 0.23,
  'CTG': 0.22,
  'CTT': 0.19,
  'TTA': 0.07,
  'TTG': 0.18},
 'M': {'ATG': 1.0},
 'N': {'AAC': 0.55, 'AAT': 0.45},
 'P': {'CCA': 0.25, 'CCC': 0.26, 'CCG': 0.22, 'CCT': 0.27},
 'Q': {'CAA': 0.43, 'CAG': 0.57},
 'R': {'AGA': 0.13,
  'AGG': 0.12,
  'CGA': 0.17,
  'CGC': 0.23,
  'CGG': 0.18,
  'CGT': 0.18},
 'S': {'AGC': 0.18,
  'AGT': 0.13,
  'TCA': 0.14,
  'TCC': 0.2,
  'TCG': 0.16,
  'TCT': 0.18},
 'T': {'ACA': 0.23, 'ACC': 0.32, 'ACG': 0.2, 'ACT': 0.24},
 'V': {'GTA': 0.13, 'GTC': 0.33, 'GTG': 0.27, 'GTT': 0.27},
 'W': {'TGG': 1.0},
 'Y': {'TAC