In [1]:
import collections

def translate_rna_to_protein(rna_string):
    """
    Translates an RNA string into a protein string using the standard RNA codon table.

    Args:
        rna_string (str): The input RNA string (mRNA strand).

    Returns:
        str: The translated protein string.
    """

    rna_codon_table = {
        "UUU": "F", "UUC": "F", "UUA": "L", "UUG": "L",
        "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S",
        "UAU": "Y", "UAC": "Y", "UAA": "Stop", "UAG": "Stop",
        "UGU": "C", "UGC": "C", "UGA": "Stop", "UGG": "W",

        "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
        "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
        "CAU": "H", "CAC": "H", "CAA": "Q", "CAG": "Q",
        "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R",

        "AUU": "I", "AUC": "I", "AUA": "I", "AUG": "M",  # AUG is typically the start codon
        "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
        "AAU": "N", "AAC": "N", "AAA": "K", "AAG": "K",
        "AGU": "S", "AGC": "S", "AGA": "R", "AGG": "R",

        "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
        "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
        "GAU": "D", "GAC": "D", "GAA": "E", "GAG": "E",
        "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G"
    }

    protein_sequence = []
    for i in range(0, len(rna_string), 3):
        codon = rna_string[i:i+3] # Extract the current 3-nucleotide codon.

        if len(codon) == 3:
            amino_acid = rna_codon_table.get(codon) # Look up the amino acid in the table.

            if amino_acid == "Stop":
                # If a 'Stop' codon is encountered, translation terminates.
                break
            elif amino_acid:
                # If a valid amino acid is found (not None and not 'Stop'), add it to the sequence.
                protein_sequence.append(amino_acid)
            else:
                # Handle cases where a codon might not be in the table (e.g., invalid RNA string).
                # In a real scenario, you might raise an error or log a warning.
                # For this task, we'll just skip it.
                print(f"Warning: Unknown codon '{codon}' found. Skipping.")
        else:
            # This handles cases where the RNA string length is not a multiple of 3.
            # The remaining nucleotides at the end cannot form a complete codon.
            print(f"Warning: Incomplete codon '{codon}' at the end of the RNA string. Ignoring.")

    # Join the list of amino acids into a single protein string and return it.
    return "".join(protein_sequence)

# Sample Dataset from the problem description
sample_rna = "AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA"

# Translate the sample RNA string
translated_protein = translate_rna_to_protein(sample_rna)

# Print the result
print(f"Input RNA string: {sample_rna}")
print(f"Translated protein string: {translated_protein}")

Input RNA string: AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA
Translated protein string: MAMAPRTEINSTRING
