In [19]:
from Bio.codonalign.codonseq import CodonSeq, cal_dn_ds
import Bio.Data.CodonTable

We going to calculate for each codon how many positions can lead to synonymous mutations and how many to non-synonymous mutations. We will use the genetic code to determine the amino acid encoded by each codon. We will then calculate the number of codons that encode the same amino acid as the original codon and the number of codons that encode a different amino acid. Finally, we will calculate the ratio of synonymous to non-synonymous mutations.

In [20]:
# Import the standard codon table, those are the codons which are used to translate the DNA sequence into amino acids at human cells
print(Bio.Data.CodonTable.standard_dna_table)

Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V   | GCG A   | GAG E   | GGG G   | G
--+---------

In [26]:
# For each codon we going to calculate how many positions can lead to synonymous mutations and how many to non-synonymous mutations
# We going to examen all possible mutations for each codon and calculate the number of synonymous and non-synonymous mutations
# For example, for the codon 'TTC' which codes for the amino acid 'F' (Phenylalanine) we have 9 possible mutations
# ATC, CTC, GTC, TAC, TCC, TGC, TTA, TTG which lead to non-synonymous mutations and TTT which leads to a synonymous mutation

for codon in Bio.Data.CodonTable.standard_dna_table.forward_table:
    nucleotides = ['A', 'C', 'G', 'T']
    for i in range(3):
        for nucleotide in nucleotides:
            if codon[i] != nucleotide:
                mutated_codon = codon[:i] + nucleotide + codon[i+1:]
                mutated_amino_acid = Bio.Data.CodonTable.standard_dna_table.forward_table[mutated_codon]
                if mutated_amino_acid != Bio.Data.CodonTable.standard_dna_table.forward_table[codon]:
                    print(f'{codon} -> {mutated_codon} is a non-synonymous mutation')
                else:
                    print(f'{codon} -> {mutated_codon} is a synonymous mutation')
    

TTT -> ATT is a non-synonymous mutation
TTT -> CTT is a non-synonymous mutation
TTT -> GTT is a non-synonymous mutation
TTT -> TAT is a non-synonymous mutation
TTT -> TCT is a non-synonymous mutation
TTT -> TGT is a non-synonymous mutation
TTT -> TTA is a non-synonymous mutation
TTT -> TTC is a synonymous mutation
TTT -> TTG is a non-synonymous mutation
TTC -> ATC is a non-synonymous mutation
TTC -> CTC is a non-synonymous mutation
TTC -> GTC is a non-synonymous mutation
TTC -> TAC is a non-synonymous mutation
TTC -> TCC is a non-synonymous mutation
TTC -> TGC is a non-synonymous mutation
TTC -> TTA is a non-synonymous mutation
TTC -> TTG is a non-synonymous mutation
TTC -> TTT is a synonymous mutation
TTA -> ATA is a non-synonymous mutation
TTA -> CTA is a synonymous mutation
TTA -> GTA is a non-synonymous mutation


KeyError: 'TAA'