# Translating RNA into protein

translate aminoacids from nucleotides using a given RNA codon table

In [1]:
codons = {
    "UUU": "F", "CUU": "L", "AUU": "I", "GUU": "V",
    "UUC": "F", "CUC": "L", "AUC": "I", "GUC": "V",
    "UUA": "L", "CUA": "L", "AUA": "I", "GUA": "V",
    "UUG": "L", "CUG": "L", "AUG": "M", "GUG": "V",
    "UCU": "S", "CCU": "P", "ACU": "T", "GCU": "A",
    "UCC": "S", "CCC": "P", "ACC": "T", "GCC": "A",
    "UCA": "S", "CCA": "P", "ACA": "T", "GCA": "A",
    "UCG": "S", "CCG": "P", "ACG": "T", "GCG": "A",
    "UAU": "Y", "CAU": "H", "AAU": "N", "GAU": "D",
    "UAC": "Y", "CAC": "H", "AAC": "N", "GAC": "D",
    "UAA": None, "CAA": "Q", "AAA": "K", "GAA": "E",
    "UAG": None, "CAG": "Q", "AAG": "K", "GAG": "E",
    "UGU": "C", "CGU": "R", "AGU": "S", "GGU": "G",
    "UGC": "C", "CGC": "R", "AGC": "S", "GGC": "G",
    "UGA": None, "CGA": "R", "AGA": "R", "GGA": "G",
    "UGG": "W", "CGG": "R", "AGG": "R", "GGG": "G"
}

In [2]:
import numpy as np

In [3]:
sample = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"

In [7]:
def translate(s, codons):
    amino_acids = []
    for i in range(0, len(s), 3):
        codon = s[i:i+3]
        amino_acid = codons[codon]
        if amino_acid is None:
            return "".join(amino_acids)
        amino_acids.append(amino_acid)
    return "".join(amino_acids)

In [8]:
translate(sample, codons)

'MAMAPRTEINSTRING'

In [10]:
real = "AUGAUUAACCCUGUACAUCUAACCUCUACGACAUACGGUGAGAAGGGACAAAUCACUCCCUGUACAAAUUUCAAGACACUUGCAUCGGGUCCAUCGUUGUUGGUGAUUGGCCUAGCCGGUUUAUGGGUCUGUGUUAGAUUACUCGGUUACGAGCACUGGUUCACGAGACUGGAAUGCGGACCGGACAACAAAUGGUGUUCCAAAGAUUGCGAAGAUGAGAGAUUUACCCGUAGCGGCCUUGCUCUUCCAGUCAGACACAAUGAAACUGUCGUAGGUAAUACCCCCUGUCGUGUUGCUGAGGAGACAUCGUGGUCGUACGAGCACUUUGUUCCAGCUAGGGUGGAGAUUGAAGUGCCUGGCGGACCUAAGAUCUCUUCGCUAUGUCAGUUGUUUGGGGGGAUGUCUUACAAGCGUGGCAAAGCCUCACCUCGAUCUCUUCACAAACAACUGGCACCGAAUCACAAGUCGGAACGUCAGACCCGACUAUCGCGGUUUCGCAGAUUUAGCUGUUGCUAUGAGCACUUUGUAACCCUACUUCUUCCUCUUUAUUUCGACGUGUGUACACUUCUUCUCAGGGUAGUGAACUCGCCAUCUAGACGCGCAUAUCGCACAACUGUUAACAUGGGGAAGCUCGUUCUAUCGCAGUUCUUGACCGCGGCUUUCUGGUUGAAUGAAUCCUCACGUUUAUUCACCAUUCCAUUCGUGGCGCGUUUAUGGGCAGCCUCUUACCACAUGGAGGUUCAGGGCUCGACUUCGUAUAGAGUAACGUAUCAGUUGAAUCUAUUCCAACUGCUGCGAGCACUUAUCUGCACCACGUCGAAGAUAUUAACAUUGCUGACGGGUAUACGUACCUCCCCAACUUCUUUAGUGAACAGCGUGAAUGGGGAAACGAUGGAGCCCCGUCGAAAAAGAGUUCGCUCCGGCCAAGCCCGAUUGGUACGCAGCUUACUCCAAUACUAUGGUCCGUACCGACGGUAUUUAGAAGGGACUGGUAGAUCUGAUCCCCUCUGCAUCGCCACUCUUGCGAGCGCCAGACGUGGUUCGACAACAUGGCCUUGGUACACGUGGAGUCCUCGCUCUAUGAUACGCACUCUAUACACAGCCUACAUUGCAGGUGCGGUAGUUGCUGGGUUGCAGACGGCCGGCUUUUCAGCUUUAAGUCCGACUUUCGGUAUUUUCGCCAGACAUCAAUUCAAAUGGACGUAUGGCUCAUCCGAGAUUUGCCAAGAAGUAGAUGUUCACGAAGGUAGCGAGCUUGAGCCCGCCGACCUAAUCGACUCUUUAUACGAGUGUCUUCACUGGGCUGAGCGUGUCGCGUCCCGAGCUCAGAGCGACGCCACAAGACCCAGAGCAAGCCUCGCAAGGCACACCCGAGUAAAGAUGUAUCGCGGCUACCCGGCAUUACCAAGUGCAGAAACUAAGAAAAAGUGCAUGAACGGAGGCAUACAUUCGCAAUACGGUUCCAGUCUAAGCCGUCGAUGGUGGAGGCGCCAGCGUUUUCCCUUUUAUCAGACUGCUCAUUAUGGCAUCGUUGGACGGGAUAACAGUGACGAUCCGAGACUGAUCGUAAGCCUUGCUCCCGAACAUGAAGACGAUUGUGUCGUCGUGCUAUAUGGUUUCAAUGUCCAUUUGGGCCAACCGUGUGAAAAGACUUCAGACUCGGCACAACACAGACAAGAUGGCCAGACUGGUACGUGUAGGGGCAAUUACACGCAGGGGGUUAGCGCUAUAAGAAUGAGGCCAAAGAUCGAUGAGCAAAGUAGUUAUUUAGUUCUACCAUGUAAAGAUUGCCUUAUGGAGAUUUCUCUUACCAUUCGGACUACAAAAACAUCUUGCCGUAAAGUCAUGCCGUUCUACGCAUGGGUCCCGCUUCAUCAAUACCUUAAUAGUGAUCGACGGGAAUCGUUAGGAGAAUACAGUGCUGGUUUGGAGACAAUGGGGGCAGCUUCCUGUCCCCAGCAUUCGACACGAAUUGUUGCUCUGCCCCGCGAGAACCGUCAGUUGACGCUUGGAGGUACCGCCUACCGUAAUACCAAUUUCAGGGGUCGUUUACUGCCAAAGGAUUUCGGCGGUAAGGGAAUUGAGGCCUGGUGCCUGAAUUUGGCCAAUUGUGUGGAAGGAUUACCUUAUAGCUGCGCAGAUCCUGAGUCAGCCUCUUCCGUACGUAGACAAGGUUUAUGCGUCAAAACGUGGGAUCAACACUUGCAGGACAGUGAACGCGGGCAUGAAUACACCUACCUGUGCCAACUCUUCGGCACUAAUCCUACAUGGUCAGAGCGUUGCCCGAAGACAGAUCCACCUCCACAUCCUUCUCUCUCUACACUGCGCAUGGCCCGGGGGUAUUUGUCGAUUCAGUUUAACGGACAGGGCACCGUUACAUGCCCGUAUCCAGCAAGAGCCAAUCUCCACGCGUGUACGGCAUGCGCGAGAACUCGGACAGCCACGCAACCGGACCGCCUUACACCGGAUAUGAACUACUGUACUUCUCCUACGAAAUGGUUCAUACUUUUUCACCCCUUAAAGCUCCUCAUUGUUGUCCUACGUGUCAGAUAUUAUUAUAGGUGUUACACGGGGAUCAGAGUGGCUGGCUUGGCUUAUCACUUGUGGAACAUUGGCAUUGCUCGCGUGGGCCUGCGACCCACAACUUCCGUGUAUGCUGAUGUACAAACUGAUACAAACGUUACUACAGCAUACCUUCAGAACUGGCAGGAUACUCAAUUGACAUCAAUUGGCUUAAUCAACUGCCCGCUCAGAGAGCGUGACGUUCGAACCGGUCUCCCGAAUUGUUCAACGCAUGACGCGAUCAGUGAUUUGCGUGUAUCGGCACUUACGAUUGGAGGGAAAAACCGAACCAUAAAUCGGAAGUGCUCCGUCUGGGUUGCAGUACAAGCCAUCGAGCACCAAUGUGGAACCAAUUUGAGUUAUCUGCGCGACACUGUUGGGCCCAACCAUGUGUGGGUUGGACGCAUGGUCUUGCAGUCAGGCCAAAUAGCAUACUCGAGUAAGAAAGCUGCAUUACUAGCGGAAGGUGUUUCGCCCCUAACCUCGUCUGACUCUCUACCGACUCGAGGUACUUCCGAGGGGCCUACAAGCAUCUCCACAGUCGCAAAGGGCUUGUCAAUGGAGGAGUCUUUAGUCGCACCGCAUUGGUUCGCCUCUAUCCUAAGCCUAGCUCAGGCGACGAAAAUCUGCUAUCGCUUCGCAGGGUAUAAUCAUACGCGUGCGCGGGGCAUGCUCAAUCCGUCCCUUACCAGGCACUCGAUAAUUGUACAAACCGCUUGUAUGGUACCCUUCCAGAUUCUCAUCUUGGCCAGGCUCGAAUUUCGCGCACCUUCUAGCUCCACGAAAUCGAUGUUAAUCGUUACCGAUUUACGCCCACUAAAACACUUGUCAACCACCAGGGUACUUCCUCCAGUGAGCCUAAUGUUUUCAGGGGAGUUCGUUCACAUCACACGUGAUAGAUUUGCCGAGGCCUGGAUACGAUCAAGCAUGACAAACAUGGGCCGUACUCUAAAGCUCAGAUUAAGACGUGUCCUAACACCUUCUAUGGGCCUUCACAAAGAAGGGUCUACCCGAAGUUGUCUGUAUGUACCCCAUGCCAAAAGGGACCGUAUUGAAUUGCGAUAUAGUUUUGUCACAAAAAUAUAUGAUCCGAGGCUCCGCAAUGCUGAAGAAACCCGUUUAGGGGGCAACGACACUAAGGCCGUACAUUAUUCGAGUAUAUUCCCCUUGCAACUUCCUGGAGGUAUGCAAGAUAAACCAAAGAAGACCCCCUGGUGUCCUAGCGAUGCAGCAUUGCGCUCUGCCAAGAUCGGGUACUCGUCUGGACAAUUUACUCAAACUUCGACGAAUCCAAAGUGGAGCAGCGGUUAUAGCUGCGGGGAUUGUUCCUGCUUUUUUGUUAUUCAAGAAGCGACUGUCCUGAGAAUUUGGUUGUUCGAGCGGGUUGCUGAAACCUUUGGGUGCUCACACCCGCAAUCCGGCAACUUCUCGGGAACUAGUGUGCUCGGGGCCUGGAGUGGCUUGGUGGCAGUUCAUUCUCUGACAGUAACCAGUCCGCUUGAUGAGUUCAAGCAGGAGAAUUCCACCUACAUAUGCGACAUGGUGGCGAUAUGGGCUAUUGCAGCACGCUUAGUGUGGAGAGAGCAUCAGACAACGUCGAAAGUCCGAGCGUUUUUAUUGGGACGAAUCAAUGCACGUGACGGGACUUCGGCCGACUGCAGCGUACCUGGCGAAGGGCACACUUCUAUCACUUUGUGGUCGACUCCCUUCGACGCUUUGCUGAGUUCCACACCCCGUAAUAUACCCCUUGAAGCAGCAUAUAGCUGUCGGCACAUGUAUACGUUUACUCUACAUGGAGUUUAUGCACCGCAGACUGCUUUUACGAGCAUCAGUCGUUGGUCUGCAUACGCUGGAAGGACUCCACCCAUCGUAAAACUGCGGUUGGCGGUGACGGCCGCCCUAGGCCGUACCGCAGUGCUUGUACCCUUAAAUAUGCGACGAGUCGGAGUAUCUACUCUUGACCCCGCGUGUGACGAAAGGGAGCUGAGGACAGAGGCUUUCGGUCACGGACGGCAAGAAUCGCAAAAUCUAAGUGUGGAGCAGCUGAGAGGAGGGGCAUCCGGCAGAAAUAUGCCCCAUAACACGCAUCUUGAAGUCGAUGCAUCUAGCCCCACAGACAGAAAUUGCGACUGGCAGCCAUCUGCGGAUCCCCUGGAGGACGAAGUCGCUAGCUGUAAGAAGGGAGUGGACGUAUUGCCUAUUCAGCCCAAUCUUGGCGAGGGGACUUCAAGAGUGACUGUUGGAAACUCGUGCGGUCCCGUAAUGUCGCCACAGAUUAGCGAGCGGGCCACAACAGUAUGCACCCAUCCCCGUUCCUCGAAAUCUUCCUUUCACCAAAUCGAAGUGGUGAGUAGACGGCUAUCACGCAACUCGUCAUAUGAAAUCACGGCACUUAAGGAUGACGUAACCGUGGCCUCCAAUUCUGUCAGGCGUAAUAGCAGGUCGUUAUCGGGCCGCAGAACUAACACCGCACUCGCGAUCAUCAUUGCUGACAGGAGUGCAAGACAUUUGUUGACCACGGCCGUGAAUAUAUCGCGACCCUAUAUUGGGUUACUUUUGUUCCGGCUGUACGCCGUAGACUACAUGAGCAGAAACAGCGUUCCCAAAAUUACCUAUGGAUACUCCUAUCUUCAUGACAGUUUACGCUAUAAACUCUGGCAAGAUUUGGGCGAGUUUGGAAGGGACUAUACUGCAAAGUCAGAUCGUUCUGGACACGAUCGAGUAGCUAGUAAAUCGCCAGGUGAGCGUUCGAGUGGGGUAUUGACAGCGCGGAGAAGUGCCUUGCGAGGCAUGCUUGCUCUACGCUGCUACAACUUGCCACGACCAUCUCUUAUUGGUGUGGUCUUGCAAUGCGACAAAUACUAUUUAAAUUUGGGCUGGUCCAAGUCGUUCGGUUAUGUUCGGAACGGUCGUCAACCACGUUCGUCUCGCCUCGAUUCGGGGCUGCCACGAGAACUUUCCCUAUCGUUCCGUACCCUCUCCGUAAGCACAAGUGGAAGACCCAUCAUGUAUAUGCAAGCGAAAUCAAUAGCCUACCGGCUGAUCUAUGACAGUAGAUACGCAUAUAAGUUGUGGGUCGGUUGGAGUCGUGAUAGAAUGCUACACACAGUUACAACAACAGCGAGCCACUACCAGCGCACUUACAAGCACGGCGUCUUUCUAGGAGGCUUAGACGUUCGAUGGGCCUGCUAUUUGUCGCUCGAUCAGGACCCACUCGGGUCGUUGCCGUCUACCGGGAGUAAAGUGGCGGUCGGAGCGAGUCUGUCUUGUGUUCACAGUCUAAAGGUGAGUUUUGGAAUACGAAAAAAAGUAAUAGUAGCUAAGGGCAGCGUCAAUUGGCCGAAAGAACAGAGAUGUUUUGAGCGCAGUGAGGCUUACCUUACGGGAAGUUCGGGUUGCUUUAAGACUGCUCCGGUUGGUCAGUUUCGAGACCCACCGCGAACCGAAUACAUCCAUCGUAAUCGAAAGUCAAGACCUAACACAUCCGGUCCCCACCAGAUGGGCAGGUUAUACGUCACUUGUGGAGCUACUAUGACUAGCUUUUCGUUACCGAAUUCACGCCUCGUACUGGCGCAUGCCAUCGGUCAAGGGUCUCCACGGAUUUUUAAGCCCAUAUGGGGCCCGCCUCUUGUUCUUUGCACCAUCAGUCUUGGUGUGUGGGCACUUCAAAGCACUCAGCACCGAUCAGCCUCUCAUGAUCGACCCCUGAUGGAAAAAAUAUACGGUACGGCAGCUCACUAUCUCCGAGAGUGCAAACACAGGUAUUUGGCCGGGCCUAAAGGCACUUCCACCAGCGUUGCACUUGUAGUAAACUGGCAGAAUUGGACCUGCGGAUUUCCCCGAAAGCACUCAUCCCAAGACGAGAUCGUAACGGCAGUAGCAGGUGUUCAACGUCUCCGCGGACACAUUGGUAUAUGGGAUCUUAGCGCGCUCAUCAAUCCCUACCCCUGGUUUGAAUGUUUAACGCUCGUGAAUCGUGGUACAGAUAAAAGGAACGGAUUGUCAGGGCGAUCUUGUAUCCCUAAGUGGAUACACGUUGAUGGGGACCGACGCGCGAUCGAAAAGAUGGUGAUCGCCUGUACGAGCGACAUUAAGACAACUCUGACAGUGAUGCUCAAUAAAGUAAGCGAGUAUGAUAUCAUCACGAUAUUGUUCGAGUAUGACCCGCCAAUUCUGCCAAGGGGGCCAAUGUUAUUAGCUGUUGUGUUGAGGGGCGGUCUUCGGUUUUACCUAACACUAAGAACCCGGGUGCGAGACUACCUUAGAACUUGCUCUCUGCACGUGAGAUGCUGCCAGAGGCGUAGAGAGAACUGUGGGCACGGGAGUCUAAUGAACUCUACAUCCCGACUGGAUCUAGAAUCGCGAACUUUACGCGAGGCAAGGCCACCCACAAGACGGCUCGGGCCGGGGGCUUUGAUAAGCCUAGUUUGCGUGACCCGAAAUGCCACGAACAAUCCCGGGAAAAUAGUGCGGUUCCAGCUGUGGGGCAUACUAUGUUAUACACGCUUGAUGGUCAACGGGGCGCUUAACCCGAACUUCACCACUAGGUGGGGGCUCUCAUCCGCGGAUCUUCGAUCAUUGGAGAAUGGUCCAACGCUCCCCGAUGCGGAAGCUUCCCUUACGGAAGCAGAUUGGACCGCUAGUUUUGGGUGUCUGGCUCCAUUCCUCCUCCCCCUUAAUACCCUAAUUCCACCCGGUAGGUUCAAGUCGCACGAGUCAGUCAUGCAGGCAACAAAGUGUCAGCUUCAACGGCAAGUUGUGAAGCGUGUUCGGGGUCAAACGACUCACAGACCCAAGAGUGGACACGAUUCUCACAAUUCCUGCAAUCGAUGGAGCCCCGCGUUGGCGCAAGGGGGUCACCAUCGCAGAGCAAAAAUAGUGCCAACAAGGUACAUCCUCGUACUAAUAAGAAUCGAGCCACAGCCCAUAAUAACGGACACGGUCAGGGUACGGCUACAAAUCUAUCUGAAUCGGUAUGAGUGCCCAGGCUUGUGGGAGAAUAAUAGUUUCCUAUUAAUAGCCAACCCAUGCGGGCAUCCGAGCCGCAGCGUCCGUGUCCCAAGGGGGCUUCAAAGCUCACAACAGUGGAGCUACAUCGGUAACAACAUAAAGGUUCAUUCGUGUAUCGAUGCGAAUGAGGUACCCGUACAGACCCCGAUGGACAGAAUUUGUCUACCGGCUAGGAACGCAGCCUCUCCAAGGUUUACAACGGCCACACAGGUCGAAGCCCUCUGUUGCCGUCCCUUCGCCAACCCAACCUACUUCGAUGAAAGAUGCAGGAAUGCGAGCAUGGCGAGGUGGCUGAGGCGAACACCUAGGUUAACGUCACCCCCGAAAGGGUUCAACGGCUCCGCGGUAAAGCACCCGAACCUCUUCCCCUUAGUCCUAGAAGGUAGUAGAAAUACGAAGUCCGCAUCGAGAUUUGGGGGCCUAUUCCCGGUACAAUAUGUAAAUCUCGGGAGGAAUCAUUUGAGGUCGGGGGGCCGACAUAGACGAGUACGGGAGCCUAACUGGGCCUUCUUGUCCAACCCCAAUUCACAAGCAACGGAGUCGGUGAUGGUACCGGAAGCGUUUAUACAGCGCAACUGGUUGUAUCUCUAUCCGACCACAAUAAUAAACCGUGCCGGCCAAACUUUUAGGUUGAUGUUCAGUGGUUCCGUGACGUUACUCCCGUGCGGGCACAUCAAUGCUAUUUUGACAGUUCUUACGGAGCAUGCUCAUCUACUCAUCCUCCGCAGUUGGGCACUGCCCCAAAGGGUACAAUUUGAAGGUGCUAACUCUGAAUUAGCUGGACGUUCCCGUGGCGAUCGAAAUGCUUGGCUUCACGACAACCCGCACAGAAUGAGACUUUGCCGGCGGUCCUCACAACUGCUCCAGUGGUGGAGGAACAUGGUACACAGCUCAAUGCAUAGUUUCCAGCGCCGAGUGGUGUAG"

In [11]:
translate(real, codons)

'MINPVHLTSTTYGEKGQITPCTNFKTLASGPSLLVIGLAGLWVCVRLLGYEHWFTRLECGPDNKWCSKDCEDERFTRSGLALPVRHNETVVGNTPCRVAEETSWSYEHFVPARVEIEVPGGPKISSLCQLFGGMSYKRGKASPRSLHKQLAPNHKSERQTRLSRFRRFSCCYEHFVTLLLPLYFDVCTLLLRVVNSPSRRAYRTTVNMGKLVLSQFLTAAFWLNESSRLFTIPFVARLWAASYHMEVQGSTSYRVTYQLNLFQLLRALICTTSKILTLLTGIRTSPTSLVNSVNGETMEPRRKRVRSGQARLVRSLLQYYGPYRRYLEGTGRSDPLCIATLASARRGSTTWPWYTWSPRSMIRTLYTAYIAGAVVAGLQTAGFSALSPTFGIFARHQFKWTYGSSEICQEVDVHEGSELEPADLIDSLYECLHWAERVASRAQSDATRPRASLARHTRVKMYRGYPALPSAETKKKCMNGGIHSQYGSSLSRRWWRRQRFPFYQTAHYGIVGRDNSDDPRLIVSLAPEHEDDCVVVLYGFNVHLGQPCEKTSDSAQHRQDGQTGTCRGNYTQGVSAIRMRPKIDEQSSYLVLPCKDCLMEISLTIRTTKTSCRKVMPFYAWVPLHQYLNSDRRESLGEYSAGLETMGAASCPQHSTRIVALPRENRQLTLGGTAYRNTNFRGRLLPKDFGGKGIEAWCLNLANCVEGLPYSCADPESASSVRRQGLCVKTWDQHLQDSERGHEYTYLCQLFGTNPTWSERCPKTDPPPHPSLSTLRMARGYLSIQFNGQGTVTCPYPARANLHACTACARTRTATQPDRLTPDMNYCTSPTKWFILFHPLKLLIVVLRVRYYYRCYTGIRVAGLAYHLWNIGIARVGLRPTTSVYADVQTDTNVTTAYLQNWQDTQLTSIGLINCPLRERDVRTGLPNCSTHDAISDLRVSALTIGGKNRTINRKCSVWVAVQAIEHQCGTNLSYLRDTVGPNHVWVGRMVLQSGQIAY