In [3]:
import requests
import re

def get_protein_sequence(uniprot_id):
    """
    Fetches the protein sequence from UniProt in FASTA format.
    Handles potential request errors and parses the sequence from FASTA content.
    """
    url = f"http://www.uniprot.org/uniprot/{uniprot_id}.fasta"
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        fasta_content = response.text
        # The first line is the header, so we join all subsequent lines
        sequence = "".join(fasta_content.splitlines()[1:])
        return sequence
    except requests.exceptions.RequestException as e:
        print(f"Error fetching sequence for {uniprot_id}: {e}")
        return None

def find_n_glycosylation_motif(protein_sequence):
    """
    Finds all locations of the N-glycosylation motif (N{P}[ST]{P}) in a protein sequence.
    Returns a list of 1-based starting locations where the motif is found.
    """
    if not protein_sequence:
        return []

    motif_pattern = r"N[^P][ST][^P]"

    locations = []
    for match in re.finditer(motif_pattern, protein_sequence):
        locations.append(match.start() + 1)
    return locations

def main():
    """
    Main function to process a list of UniProt Protein Database access IDs.
    For each protein, it fetches its sequence and identifies all locations
    of the N-glycosylation motif, then prints the results.
    """
    # Sample UniProt IDs provided in the problem.
    uniprot_ids_input = """
A2Z669
B5ZC00
P07204
P20840
"""
    # Clean and parse the input string into a list of IDs
    uniprot_ids = [uid.strip() for uid in uniprot_ids_input.strip().split('\n') if uid.strip()]

    # Dictionary to store results: {uniprot_id: [location1, location2, ...]}
    results = {}

    for uniprot_id in uniprot_ids:
        print(f"Processing {uniprot_id}...")
        sequence = get_protein_sequence(uniprot_id)
        if sequence:
            motif_locations = find_n_glycosylation_motif(sequence)
            if motif_locations:
                results[uniprot_id] = motif_locations

    # Output the results in the specified format
    print("\n--- Results ---")
    for uniprot_id, locations in results.items():
        print(f"{uniprot_id}\n{' '.join(map(str, locations))}")

if __name__ == "__main__":
    main()

Processing A2Z669...
Processing B5ZC00...
Processing P07204...
Processing P20840...

--- Results ---
B5ZC00
85 118 142 306 395
P07204
47 115 382 409
P20840
79 109 135 248 306 348 364 402 485 501 614
