In [8]:
import pandas as pd
from Bio.Seq import Seq
import itertools

In [2]:
df = pd.read_csv('yeast_synonymous_codon_table.csv')
df.head()

Unnamed: 0,codon,iupac
0,TTT,TTY
1,TTC,TTY
2,TTA,TTR
3,TTG,TTR
4,CTT,TTR


In [3]:
iupac_dict = {
'A':'A',
'C':'C',
'G':'G',
'T':'T',
'AC':'M',
'AG':'R',
'AT':'W',
'CG':'S',
'CT':'Y',
'GT':'K',
'ACG':'V',
'ACT':'H',
'AGT':'D',
'CGT':'B',
'ACGT':'N'}

rev_iupac_dict = {value:key for key,value in iupac_dict.items()}

In [4]:
rev_iupac_dict

{'A': 'A',
 'C': 'C',
 'G': 'G',
 'T': 'T',
 'M': 'AC',
 'R': 'AG',
 'W': 'AT',
 'S': 'CG',
 'Y': 'CT',
 'K': 'GT',
 'V': 'ACG',
 'H': 'ACT',
 'D': 'AGT',
 'B': 'CGT',
 'N': 'ACGT'}

In [9]:
def iupac_to_codons(iupac_codon):
    """Return list of codons encoded by input iupac missense codon"""
    nuc_lists = [list(rev_iupac_dict[n]) for n in iupac_codon]
    codon_list = [''.join(i) for i in list(itertools.product(*nuc_lists))]
    return codon_list

def check_syn_codon(row):
    codon = row['codon']
    wt_aa = str(Seq(codon).translate())
    iupac_codon = row['iupac']
    codon_list = iupac_to_codons(iupac_codon)
    aa_list = [str(Seq(i).translate()) for i in codon_list]
    for i in aa_list:
        if i != wt_aa:
            return False
    return True

In [11]:
df.apply(check_syn_codon, axis=1).sum()

64