In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from tabulate import tabulate 

In [2]:
def get_uniprot_data(protein_id):
    base_url = "https://www.uniprot.org/uniprot/"
    format_option = ".xml"
    
    api_url = base_url + protein_id + format_option
    
    try:
        response = requests.get(api_url)
        
        if response.status_code == 200:
            return response.text
        else:
            print(f"Error: Unable to retrieve data. Status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [3]:
def parse_uniprot_xml(xml_string):
    root = ET.fromstring(xml_string)

    namespaces = {'ns': 'http://uniprot.org/uniprot'}
    entries = []
    names = []
    organisms = []
    sequences = []
    annotations = []

    for entry in root.findall('.//ns:entry', namespaces):
        accession = entry.find('ns:accession', namespaces).text
        name = entry.find('ns:name', namespaces).text

        organism_scientific = entry.find('.//ns:organism/ns:name[@type="scientific"]', namespaces).text
        organism_common = entry.find('.//ns:organism/ns:name[@type="common"]', namespaces).text

        sequence_elem = entry.find('.//ns:sequence', namespaces)
        sequence = sequence_elem.text if sequence_elem is not None else ''
        annotation_texts = [annotation.text for annotation in entry.findall('.//ns:comment/ns:text', namespaces)]
        
        entries.append(accession)
        names.append(name)
        organisms.append(organism_scientific)
        sequences.append(sequence)
        annotations.append('; '.join(annotation_texts))

    df = pd.DataFrame({'Entry': entries, 'Name': names, 'Organism': organisms, 'Sequence': sequences, 'Annotations': annotations})

    return df

In [4]:
protein_id = "P12344"
protein_data = get_uniprot_data(protein_id)

if protein_data:
    df = parse_uniprot_xml(protein_data)
    
    print(df)

    Entry        Name    Organism  \
0  P12344  AATM_BOVIN  Bos taurus   

                                            Sequence  \
0  MALLHSGRFLSGVAAAFHPGLAAAASARASSWWAHVEMGPPDPILG...   

                                         Annotations  
0  Catalyzes the irreversible transamination of t...  


In [6]:
table = tabulate(df, headers='keys', tablefmt='grid', showindex=False, maxcolwidths=[10, 10])
print(table)

+---------+------------+------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------