In [1]:
#Ihsan Muchsin
#Using default library

def prot(infile, outfile, codon):
    '''
    Given: 1) An input file containing an RNA string s corresponding to a strand of mRNA (of length at most 10 kbp). 2) The name of output file. 3) A file containing codon table from rosalind
    Return: The protein string encoded by s, plus an output file containing the result
    '''
    
    rna = ""
    with open(infile, 'r') as fin:
        for line in fin:
            line = line.strip()
            rna += line
            
    codon_table = {}
    with open(codon, 'r') as cod:
        for line in cod:
            cod_line = []
            
            line = line.strip() #strip the \n
            cod_line = line.split() #split the line into codon, amino acids, codon, amino acids .....
        
            for i in range(0, len(cod_line), 2):
                codon_table[cod_line[i]] = cod_line[i+1]
    
    prot = ""
    for i in range(0, len(rna), 3):
        if codon_table[rna[i:i+3]] != 'Stop':
            prot += codon_table[rna[i:i+3]] #get the value from codon table
        else:
            break
        
    with open(outfile, 'w') as fout:
        fout.write(prot)
        
    return prot       

In [2]:
infile = 'rosalind_prot.txt'
outfile = 'prot_sol.txt'
codon = 'codon.txt'

res = prot(infile, outfile, codon)
print (res)

MRITAPCPRPWGGKAARISHTIQLHNMRIASRADEQIHNCTSQATAGAEPGERWNNLLQNSRYCSRVPPGQKCACAKITSGPTVDLIILKRSVKDPRKLTRIIGMRSSLANYFGPSILSGDVQITRKGLAEPHRHLTTRNTGKRAGATSCRPDVVFSTSRIEINYIVYDHGSRDERNGGFAVCPLSLNGLPQGNSKTISSTPTNIQLHGNILFRPCNPRDYSGREVQEHAHPKSEWSRALTVCHLQRLTAVYHRVTSSEATARRKTLRRVSDVNGAASRDFDLLSLLAHRNSLCNRTVIASLNITWHRYLPTRHHGNLTLRPYMVTMGPLPQRSQSLLSAGYLLLCLDHRLLSLRKTESRVLDGPEATTLRLVGSREVTFVSCFNCARIIRFITTVYHPSSLPQKCLDRTVLSLSNIFCSHLESQEGIENRCNSCSSPSNPSTAQPVIKKNLFITTPPGARTIRAPSVDALPRLCLGERRLLLAAHISPQSTGLCLYLQHSRQRQRQRGDGKQPFGASSKGAGSPLGVETVKRVLHWPDILPSRFKINSPRPFEGCSEARISKAQPQVLQVVRNCRRVLSCKRDRLLRALLFGGLGMSSDYSKWNILLTRIGLDYPVSPGLAPLSIFGYLKDPSGQQRDRLPSRFSLHHCPARGEGQNVDAQRYPIVVRFANDSDCRSNDGGWYAFVPGQSLRPSWAHPSRYILTVGAPRRRTFFNTYTTTVGSVIILLMRPPNPKCYRGGTPMLVSREADKYAHASLPECGAPIHRYGTSQLVCFWIIHCLAIRGGSHLFAVDVYPTSNQECPCSSSDRTRTASVMARSVTIIPRDKYLRIIKDLLNATRTTPTVSTQSFYHSAGILSATYLERSVPRFIRTATTFLKTPTSESGTTQSKRQFFHLNTESVNGEERYSETRSSQRIVNTVTHIWITVVLVCVVSMNSSVILDIHLIRWDSYLANCRSLHKTQLCGTIPHKAFLVANGNQTGCDPINQIAWFSTLDSWSD

In [3]:
#Ihsan Muchsin
#Using biopython library. much simpler code

from Bio.Seq import Seq
import Bio.Alphabet

def prot_biopython(infile, outfile):
    '''
    Given: 1) An input file containing an RNA string s corresponding to a strand of mRNA (of length at most 10 kbp). 2) The name of output file
    Return: The protein string encoded by s, plus an output file containing the result
    '''
    
    rna = ""
    with open(infile, 'r') as fin:
        rna = Seq(fin.read().replace('\n', ''), Bio.Alphabet.IUPAC.unambiguous_rna) #create an RNA Seq object
    
    prot = str(rna.translate(to_stop=True)) #translate RNA Seq object to protein and cast it to string object
    
    with open(outfile, 'w') as fout:
        fout.write(prot)
        
    return prot

In [4]:
infile = 'rosalind_prot.txt'
outfile = 'prot_sol.txt'

res = prot_biopython(infile, outfile)
print (res)

MRITAPCPRPWGGKAARISHTIQLHNMRIASRADEQIHNCTSQATAGAEPGERWNNLLQNSRYCSRVPPGQKCACAKITSGPTVDLIILKRSVKDPRKLTRIIGMRSSLANYFGPSILSGDVQITRKGLAEPHRHLTTRNTGKRAGATSCRPDVVFSTSRIEINYIVYDHGSRDERNGGFAVCPLSLNGLPQGNSKTISSTPTNIQLHGNILFRPCNPRDYSGREVQEHAHPKSEWSRALTVCHLQRLTAVYHRVTSSEATARRKTLRRVSDVNGAASRDFDLLSLLAHRNSLCNRTVIASLNITWHRYLPTRHHGNLTLRPYMVTMGPLPQRSQSLLSAGYLLLCLDHRLLSLRKTESRVLDGPEATTLRLVGSREVTFVSCFNCARIIRFITTVYHPSSLPQKCLDRTVLSLSNIFCSHLESQEGIENRCNSCSSPSNPSTAQPVIKKNLFITTPPGARTIRAPSVDALPRLCLGERRLLLAAHISPQSTGLCLYLQHSRQRQRQRGDGKQPFGASSKGAGSPLGVETVKRVLHWPDILPSRFKINSPRPFEGCSEARISKAQPQVLQVVRNCRRVLSCKRDRLLRALLFGGLGMSSDYSKWNILLTRIGLDYPVSPGLAPLSIFGYLKDPSGQQRDRLPSRFSLHHCPARGEGQNVDAQRYPIVVRFANDSDCRSNDGGWYAFVPGQSLRPSWAHPSRYILTVGAPRRRTFFNTYTTTVGSVIILLMRPPNPKCYRGGTPMLVSREADKYAHASLPECGAPIHRYGTSQLVCFWIIHCLAIRGGSHLFAVDVYPTSNQECPCSSSDRTRTASVMARSVTIIPRDKYLRIIKDLLNATRTTPTVSTQSFYHSAGILSATYLERSVPRFIRTATTFLKTPTSESGTTQSKRQFFHLNTESVNGEERYSETRSSQRIVNTVTHIWITVVLVCVVSMNSSVILDIHLIRWDSYLANCRSLHKTQLCGTIPHKAFLVANGNQTGCDPINQIAWFSTLDSWSD