# Database Tests

In [18]:
import os, requests
from pathlib import Path
import tempfile

## UniProt

(Sequence Retrieval)

In [2]:
uniprot_id = 'P05067'

In [None]:
def get_uniprot_sequence(uniprot_id):
    params = {"fields": ["sequence"]}
    headers = {"accept": "application/json"}
    
    uniprot_url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}"

    response = requests.get(uniprot_url, headers=headers, params=params)

    if not response.ok:
        response.raise_for_status()

    data = response.json()
    return data['sequence']['value']

In [7]:
get_uniprot_sequence(uniprot_id)

'MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMNVQNGKWDSDPSGTKTCIDTKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPHFVIPYRCLVGEFVSDALLVPDKCKFLHQERMDVCETHLHWHTVAKETCSEKSTNLHDYGMLLPCGIDKFRGVEFVCCPLAEESDNVDSADAEEDDSDVWWGGADTDYADGSEDKVVEVAEEEEVAEVEEEEADDDEDDEDGDEVEEEAEEPYEEATERTTSIATTTTTTTESVEEVVREVCSEQAETGPCRAMISRWYFDVTEGKCAPFFYGGCGGNRNNFDTEEYCMAVCGSAMSQSLLKTTQEPLARDPVKLPTTAASTPDAVDKYLETPGDENEHAHFQKAKERLEAKHRERMSQVMREWEEAERQAKNLPKADKKAVIQHFQEKVESLEQEAANERQQLVETHMARVEAMLNDRRRLALENYITALQAVPPRPRHVFNMLKKYVRAEQKDRQHTLKHFEHVRMVDPKKAAQIRSQVMTHLRVIYERMNQSLSLLYNVPAVAEEIQDEVDELLQKEQNYSDDVLANMISEPRISYGNDALMPSLTETKTTVELLPVNGEFSLDDLQPWHSFGADSVPANTENEVEPVDARPAADRGLTTRPGSGLTNIKTEEISEVKMDAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIATVIVITLVMLKKKQYTSIHHGVVEVDAAVTPEERHLSKMQQNGYENPTYKFFEQMQN'

## ModelArchive
(Structure Retrieval)

In [28]:
database_id = 'ma-osf-ppp2r2a-001'

In [40]:
def get_modelarchive_structure(database_id):
    ma_url = f"https://www.modelarchive.org/api/projects/{database_id}?type=basic__model_file_name"
    # response = requests.get(url)
    # response.raise_for_status()
    # data = response.json()
    ## Download the CIF file to temp directory
    cif_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".cif")

    with requests.get(ma_url, stream=True) as r:
        r.raise_for_status()
        for chunk in r.iter_content(chunk_size=8192):
            cif_file_path.write(chunk)

    return cif_file_path.name

In [39]:
get_modelarchive_structure(database_id)

'/var/folders/__/1v5xyw5s0tl2z88yrm0lb4d40000gn/T/tmp_l6m8k0g.pdb'

In [35]:
ma_resonse.text

'data_ma-osf-ppp2r2a-001\n_entry.id ma-osf-ppp2r2a-001\n_entry.ma_collection_id ma-osf-ppp2r2a\n\n_struct.entry_id ma-osf-ppp2r2a-001\n_struct.pdbx_model_details\n\'Screen of PPP2R2A binding to SH3PXD2B. The model was generated using AlphaFold2-Multimer-v3, using all 5 models, with early stop in case of convergence (max number of recycles was 20).\'\n_struct.pdbx_structure_determination_methodology computational\n_struct.title \'Screen of PPP2R2A binding to SH3PXD2B\'\n\n_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic\n_audit_conform.dict_name mmcif_ma.dic\n_audit_conform.dict_version 1.4.6\n\nloop_\n_citation.id\n_citation.title\n_citation.journal_abbrev\n_citation.journal_volume\n_citation.page_first\n_citation.page_last\n_citation.year\n_citation.pdbx_database_id_PubMed\n_citation.pdbx_database_id_DOI\n1 \'ColabFold: making protein folding accessible to all.\' \'Nature Methods\' 19 679 682 2022 35637307 10.1038/s41592-022-

## AlphaFoldDB

(Retrieve Structure)

In [8]:
database_id = 'P05067'

In [20]:
def get_afdb_structure(database_id):
    params = {}
    headers = {"accept": "application/json"}
    
    afdb_url = f"https://alphafold.ebi.ac.uk/api/prediction/{database_id}"

    response = requests.get(afdb_url, headers=headers, params=params)

    if not response.ok:
        response.raise_for_status()

    data = response.json()

    pdb_url = data[0]['pdbUrl']

    ## Download the PDB file to temp directory
    pdb_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb")

    with requests.get(pdb_url, stream=True) as r:
        r.raise_for_status()
        for chunk in r.iter_content(chunk_size=8192):
            pdb_file_path.write(chunk)

    return pdb_file_path.name

In [21]:
get_afdb_structure(database_id)

'/var/folders/__/1v5xyw5s0tl2z88yrm0lb4d40000gn/T/tmp6j6nt7_q.pdb'