Retrieve models.

In [None]:
# GS
"""
Need testing.
Add a way to get MODELS AND METADATA
"""
import os
import subprocess
import sys
import json
import zipfile as z

try:
    from bioservices import BioModels
except ImportError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "bioservices"])
    from bioservices import BioModels

import json
import zipfile as z

thisDir = os.path.dirname(os.path.abspath(__file__))

s = BioModels()


def get_filtered_models() -> list:
    """ Retrieve targeted models. """
    species = "species:Homo sapiens"
    curation_status = "curationstatus:Manually curated"
    formats = "(format:SMBL OR format:Other)"
    go_annotation = "go:immune response"

    query_parts = [species, curation_status, formats, go_annotation]
    query = " AND ".join(query_parts)

    search_results = s.search(query)
    model_ids = [model['id'] for model in search_results['models']]
    return model_ids


def download_biomodels(directory: str, model_ids: list, num_per_download=100):
    """ Downloads SBML models from the curated section of BioModels based on filtered models. """
    if num_per_download > 100:
        raise ValueError("Maximum number of models that can be downloaded at a time is 100")

    num_downloads = len(model_ids) // num_per_download + (len(model_ids) % num_per_download > 0)

    filenames = []

    for i in range(num_downloads):
        start_index = i * num_per_download
        end_index = min((i + 1) * num_per_download, len(model_ids))
        batch_ids = model_ids[start_index:end_index]

        fname = os.path.join(directory, f"Biomodels_{start_index + 1}_{end_index}.zip")
        filenames.append(fname)

        if os.path.isfile(fname):
            os.remove(fname)

        s.search_download(batch_ids, output_filename=fname)
        print(f"Biomodels models from index {start_index + 1} to {end_index} saved to {fname}")

    # consolidate zips
    with z.ZipFile(filenames[0], 'a') as z1:
        for fname in filenames[1:]:
            with z.ZipFile(fname, 'r') as zf:
                for n in zf.namelist():
                    z1.writestr(n, zf.open(n).read())

    # rename first zip
    biomodels_zip = os.path.join(directory, "biomodels.zip")
    if not os.path.isfile(biomodels_zip):
        os.rename(filenames[0], biomodels_zip)

    # remove the rest of the zips
    for fname in filenames[1:]:
        os.remove(fname)

    return filenames


if __name__ == "__main__":
    # Set to True to actually do the download
    ACTIVATE_DOWNLOAD = False

    if ACTIVATE_DOWNLOAD:
        model_ids = get_filtered_models()
        download_biomodels(thisDir, model_ids)
