In [1]:
#Ihsan Muchsin

import re
from urllib.request import urlopen

def mprt(infile, outfile):
    '''
    Given: 1) An input file containing at most 15 UniProt Protein Database access IDs. 2) The name of output file
    Return: For each protein possessing the N-glycosylation motif, output its given access ID followed by a list of locations in the protein string where the motif can be found. Plus an output file containing the result.
    '''
    
    prot = []
    with open(infile, 'r') as fin:
        for line in fin:
            uniprot_id = line.strip() #get the uniprot ID
            url = 'http://www.uniprot.org/uniprot/{}.fasta'.format(uniprot_id) #create an url for a uniprot ID
            
            with urlopen(url) as uin:
                fasta_txt = uin.read().decode('ascii') #get the text from fasta file
            
            seq = ''.join(fasta_txt.split('\n')[1:]) #get only the sequence from the text
            
            prot.append((uniprot_id, seq)) #add a tuple of uniprot ID and the sequence to the protein list
            
    prot_motif = re.compile(r'N[^P][ST][^P]')
    
    motif_pos = []
    for uniprot_id, seq in prot:
        
        match_pos, last_match = [], 0
        while True:
            match = prot_motif.search(seq, last_match)
            if not match:
                break

            last_match = match.start() + 1
            match_pos.append(last_match) #get the match position
        if match_pos:    
            motif_pos.append((uniprot_id, match_pos)) #append uniprot ID and its match positions to list
        
    with open(outfile, 'w') as fout:
        motif_string_list = []
        for uniprot_id, match_pos in motif_pos:
            match_pos_str = [str(pos) for pos in match_pos]
            motif_string = uniprot_id + '\n' + ' '.join(match_pos_str)
            motif_string_list.append(motif_string)
        fout.write('\n'.join(motif_string_list))
        
    return motif_pos

In [2]:
infile = 'rosalind_mprt.txt'
outfile = 'mprt_sol.txt'

res = mprt(infile, outfile)

res_string_list = []
for uniprot_id, match_pos in res:
    match_pos_str = [str(pos) for pos in match_pos]
    res_string = uniprot_id + '\n' + ' '.join(match_pos_str)
    res_string_list.append(res_string)
print('\n'.join(res_string_list))

P10761_ZP3_MOUSE
146 273 304 327 330
P36912_EBA2_FLAME
5 48 236 278
P13473_LMP2_HUMAN
32 38 49 58 75 101 123 179 229 242 257 275 300 307 317 356
A6WKC3
84 360
P01880_DTC_HUMAN
225 316 367
P81447_MPP3_CAPHI
96
P37803
110
P05155_IC1_HUMAN
25 69 81 238 253 352
P00749_UROK_HUMAN
322
P55067_PGCN_RAT
121 339 737 967 1164
P0AF66
90 121
P08318_P100_HCMVA
833
