In [2]:
# Define the standard genetic code
codon_table = {
    'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
    'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
    'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
    'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
    'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
    'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
    'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
    'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
    'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
    'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
    'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
    'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
    'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
    'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
    'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
    'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
}

import numpy as np
import pandas as pd
from collections import defaultdict

# Initialize data structures
amino_acids = sorted(set(codon_table.values()) - {'*'})
matrix = pd.DataFrame(0.0, index=amino_acids, columns=amino_acids)
mutation_counts = defaultdict(lambda: defaultdict(int))
total_mutations = defaultdict(int)

# Analyze all possible single-nucleotide mutations
bases = ['A', 'C', 'G', 'T']
for codon, aa_from in codon_table.items():
    if aa_from == '*':
        continue  # Skip stop codons

    for position in range(3):  # Iterate through all codon positions
        original_base = codon[position]
        for new_base in bases:
            if new_base != original_base:
                # Create mutated codon
                mutated_codon = codon[:position] + new_base + codon[position+1:]
                aa_to = codon_table.get(mutated_codon, None)

                # Count valid substitutions
                if aa_to and aa_to != '*':
                    mutation_counts[aa_from][aa_to] += 1
                    total_mutations[aa_from] += 1

# Calculate probabilities
for aa_from in amino_acids:
    total = total_mutations.get(aa_from, 1)  # Prevent division by zero
    for aa_to in amino_acids:
        count = mutation_counts[aa_from].get(aa_to, 0)
        matrix.loc[aa_from, aa_to] = count / total if total else 0

# Display formatted matrix
print(matrix.round(3))
matrix

       A      C      D      E      F      G      H      I      K      L  \
A  0.333  0.000  0.056  0.056  0.000  0.111  0.000  0.000  0.000  0.000   
C  0.000  0.125  0.000  0.000  0.125  0.125  0.000  0.000  0.000  0.000   
D  0.111  0.000  0.111  0.222  0.000  0.111  0.111  0.000  0.000  0.000   
E  0.125  0.000  0.250  0.125  0.000  0.125  0.000  0.000  0.125  0.000   
F  0.000  0.111  0.000  0.000  0.111  0.000  0.000  0.111  0.000  0.333   
G  0.114  0.057  0.057  0.057  0.000  0.343  0.000  0.000  0.000  0.000   
H  0.000  0.000  0.111  0.000  0.000  0.000  0.111  0.000  0.000  0.111   
I  0.000  0.000  0.000  0.000  0.074  0.000  0.000  0.222  0.037  0.148   
K  0.000  0.000  0.000  0.125  0.000  0.000  0.000  0.062  0.125  0.000   
L  0.000  0.000  0.000  0.000  0.118  0.000  0.039  0.078  0.000  0.353   
M  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.333  0.111  0.222   
N  0.000  0.000  0.111  0.000  0.000  0.000  0.111  0.111  0.222  0.000   
P  0.111  0.000  0.000  0

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
A,0.333333,0.0,0.055556,0.055556,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.111111,0.111111,0.0,0.0
C,0.0,0.125,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.25,0.0,0.0,0.125,0.125
D,0.111111,0.0,0.111111,0.222222,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111
E,0.125,0.0,0.25,0.125,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0
F,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.111111
G,0.114286,0.057143,0.057143,0.057143,0.0,0.342857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.171429,0.057143,0.0,0.114286,0.028571,0.0
H,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.111111,0.111111,0.222222,0.111111,0.0,0.0,0.0,0.0,0.111111
I,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.222222,0.037037,0.148148,0.111111,0.074074,0.0,0.0,0.037037,0.074074,0.111111,0.111111,0.0,0.0
K,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0625,0.125,0.0,0.0625,0.25,0.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0
L,0.0,0.0,0.0,0.0,0.117647,0.0,0.039216,0.078431,0.0,0.352941,0.039216,0.0,0.078431,0.039216,0.078431,0.039216,0.0,0.117647,0.019608,0.0


In [3]:
# Define the standard genetic code
codon_table = {
    'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
    'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
    'TAT': 'Y', 'TAC': 'Y', 'TAA': '*', 'TAG': '*',
    'TGT': 'C', 'TGC': 'C', 'TGA': '*', 'TGG': 'W',
    'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L',
    'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
    'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
    'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
    'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
    'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
    'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',
    'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
    'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
    'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
    'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',
    'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'
}

import numpy as np
import pandas as pd
from collections import defaultdict

# Initialize data structures
amino_acids = sorted(set(codon_table.values()) - {'*'})
matrix = pd.DataFrame(0.0, index=amino_acids, columns=amino_acids)
mutation_counts = defaultdict(lambda: defaultdict(int))
total_mutations = defaultdict(int)

# Analyze all possible single-nucleotide mutations
bases = ['A', 'C', 'G', 'T']
for codon, aa_from in codon_table.items():
    if aa_from == '*':
        continue  # Skip stop codons

    for position in range(3):  # Iterate through all codon positions
        original_base = codon[position]
        for new_base in bases:
            if new_base != original_base:
                # Create mutated codon
                mutated_codon = codon[:position] + new_base + codon[position+1:]
                aa_to = codon_table.get(mutated_codon, None)

                # Count valid substitutions
                if aa_to and aa_to != '*':
                    mutation_counts[aa_from][aa_to] += 1
                    total_mutations[aa_from] += 1

# Calculate probabilities
for aa_from in amino_acids:
    total = total_mutations.get(aa_from, 1)  # Prevent division by zero
    for aa_to in amino_acids:
        count = mutation_counts[aa_from].get(aa_to, 0)
        matrix.loc[aa_from, aa_to] = count / total if total else 0

# Display formatted matrix
print(matrix.round(3))
matrix

       A      C      D      E      F      G      H      I      K      L  \
A  0.333  0.000  0.056  0.056  0.000  0.111  0.000  0.000  0.000  0.000   
C  0.000  0.125  0.000  0.000  0.125  0.125  0.000  0.000  0.000  0.000   
D  0.111  0.000  0.111  0.222  0.000  0.111  0.111  0.000  0.000  0.000   
E  0.125  0.000  0.250  0.125  0.000  0.125  0.000  0.000  0.125  0.000   
F  0.000  0.111  0.000  0.000  0.111  0.000  0.000  0.111  0.000  0.333   
G  0.114  0.057  0.057  0.057  0.000  0.343  0.000  0.000  0.000  0.000   
H  0.000  0.000  0.111  0.000  0.000  0.000  0.111  0.000  0.000  0.111   
I  0.000  0.000  0.000  0.000  0.074  0.000  0.000  0.222  0.037  0.148   
K  0.000  0.000  0.000  0.125  0.000  0.000  0.000  0.062  0.125  0.000   
L  0.000  0.000  0.000  0.000  0.118  0.000  0.039  0.078  0.000  0.353   
M  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.333  0.111  0.222   
N  0.000  0.000  0.111  0.000  0.000  0.000  0.111  0.111  0.222  0.000   
P  0.111  0.000  0.000  0

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
A,0.333333,0.0,0.055556,0.055556,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.111111,0.111111,0.0,0.0
C,0.0,0.125,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.25,0.0,0.0,0.125,0.125
D,0.111111,0.0,0.111111,0.222222,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111
E,0.125,0.0,0.25,0.125,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.0
F,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.111111
G,0.114286,0.057143,0.057143,0.057143,0.0,0.342857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.171429,0.057143,0.0,0.114286,0.028571,0.0
H,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.111111,0.111111,0.222222,0.111111,0.0,0.0,0.0,0.0,0.111111
I,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.222222,0.037037,0.148148,0.111111,0.074074,0.0,0.0,0.037037,0.074074,0.111111,0.111111,0.0,0.0
K,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0625,0.125,0.0,0.0625,0.25,0.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0
L,0.0,0.0,0.0,0.0,0.117647,0.0,0.039216,0.078431,0.0,0.352941,0.039216,0.0,0.078431,0.039216,0.078431,0.039216,0.0,0.117647,0.019608,0.0
