In [1]:
from common_immunogit import *

2025-05-02 14:49:27,950 - INFO - Root path: /Users/guillaume.souede/PycharmProjects/immunogit


2025-05-02 14:49:27,951 - INFO - Directory structure set up successfully.


In [2]:
"""
Query Creation
"""


def create_query(domain="biomodels", offset=0, num_results=100):
    """
    Creates a query string and URL for searching BioModels.

    Args:
        domain (str): The domain to search within. Defaults to "biomodels".
        offset (int): The starting point for search results. Defaults to 0.
        num_results (int): The number of results to retrieve. Defaults to 100.

    Returns:
        tuple: A tuple containing:
            - query (str): The formatted query string.
            - url (str): The URL for the search query.
    """
    query_parts_full = {
        'mode': '*:*',
        'species': 'TAXONOMY:9606',
        'curation_status': 'curationstatus:"Manually curated"',
        'formats': 'modelformat:"SBML"',
        'kw': 'submitter_keywords:"Immuno-oncology"'
    }

    query_parts = [value for value in query_parts_full.values() if value]
    query = " AND ".join(query_parts)

    query_for_url = query.replace(" ", "%20").replace(":", "%3A").replace('"', "%22")
    url = f"https://www.ebi.ac.uk/biomodels/search?query={query_for_url}&domain={domain}&offset={offset}&numResults={num_results}"

    return query, url

In [3]:
"""
BioServices' way to retrieve Models
"""

s = BioModels()

def get_filtered_models(query: str) -> list:
    """
    Retrieve filtered models based on a query.

    Args:
        query (str): The search query string.

    Returns:
        list: A list of model IDs matching the query.

    Raises:
        RuntimeError: If an error occurs during the search process.
    """
    offset = 0
    num_results = 10
    all_models = []

    try:
        while True:
            search_results = s.search(query, numResults=num_results, offset=offset)

            if search_results.get("models"):
                models = search_results["models"]
                all_models.extend(models)

                offset += num_results
            else:
                break

        if all_models:
            print(f"\nTotal models : {len(all_models)}")
        else:
            print("No matching models.")

    except Exception as e:
        raise RuntimeError(f"Error : {str(e)}")

    return [model['id'] for model in all_models]

def get_model_metadata(model_ids: list) -> dict:
    """
    Retrieve metadata for a list of model IDs.

    Args:
        model_ids (list): A list of model IDs.

    Returns:
        dict: A dictionary containing metadata for each model ID.
    """
    metadata = {}
    for model_id in model_ids:
        try:
            model_data = s.get_model(model_id)
            metadata[model_id] = model_data
        except Exception as e:
            print(f"Error on {model_id}: {e}")

    return metadata

def save_metadata_to_json(metadata: dict, filename: str):
    """
    Save metadata to a JSON file.

    Args:
        metadata (dict): The metadata dictionary to save.
        filename (str): The path to the JSON file.

    Raises:
        Exception: If an error occurs during the file writing process.
    """
    try:
        with open(filename, 'w', encoding='utf-8') as json_file:
            json.dump(metadata, json_file, ensure_ascii=False, indent=4)
        print(f"Metadata saved to {filename}")
    except Exception as e:
        print(f"Error : {e}")

def download_biomodels(directory: str, model_ids: list, num_per_download=100):
    """
    Download BioModels in batches and consolidate them into a single ZIP file.

    Args:
        directory (str): The directory to save the downloaded files.
        model_ids (list): A list of model IDs to download.
        num_per_download (int): The number of models to download per batch (max 100).

    Returns:
        str: The path to the consolidated ZIP file.

    Raises:
        ValueError: If the model_ids list is empty or num_per_download exceeds 100.
    """
    if num_per_download > 100:
        raise ValueError("Maximum number of models that can be downloaded at a time is 100.")

    total_models = len(model_ids)
    if total_models == 0:
        raise ValueError("Error : model_ids list empty.")

    num_downloads = (total_models // num_per_download) + (1 if total_models % num_per_download > 0 else 0)
    filenames = []

    for download_number in range(num_downloads):
        start = download_number * num_per_download
        end = min(start + num_per_download, total_models)
        batch = model_ids[start:end]

        print(f"Downloading batch {download_number + 1}: Models {start + 1} to {end}")

        fname = os.path.join(directory, f"Biomodels_{start + 1}_to_{end}.zip")
        filenames.append(fname)

        if os.path.isfile(fname):
            os.remove(fname)

        try:
            s.search_download(batch, output_filename=fname)
            print(f"Downloaded models {start + 1} to {end} into {fname}")
        except Exception as e:
            print(f"Error downloading batch {download_number + 1}: {str(e)}")

    final_zip = os.path.join(directory, "biomodels_filtered.zip")
    with z.ZipFile(filenames[0], 'a') as z1:
        for fname in filenames[1:]:
            with z.ZipFile(fname, 'r') as zf:
                for n in zf.namelist():
                    z1.writestr(n, zf.read(n))

    if not os.path.isfile(final_zip):
        os.rename(filenames[0], final_zip)

    for fname in filenames[1:]:
        try:
            os.remove(fname)
        except Exception:
            print(f"Could not delete temporary file: {fname}")

    print(f"All models consolidated into {final_zip}")
    return final_zip

def bioservices_get_models():
    """
    Main function to retrieve, process, and download BioModels.

    This function performs the following steps:
    1. Creates a query to search for models.
    2. Retrieves filtered model IDs based on the query.
    3. Fetches metadata for the filtered models.
    4. Saves the metadata to a JSON file.
    5. Downloads the models in batches and consolidates them into a single ZIP file.

    Raises:
        Exception: If any error occurs during the process.
    """
    query, _ = create_query()
    try:
        filtered_model_ids = get_filtered_models(query)
        model_metadata = get_model_metadata(filtered_model_ids)
        save_metadata_to_json(model_metadata, md_path / "model_metadata.json")
        output_zip = download_biomodels(
            directory=bm_sbml_path,
            model_ids=filtered_model_ids,
            num_per_download=100
        )

        print(f"Models downloaded and saved in {output_zip}")

    except Exception as e:
        print(f"Error : {e}")

[32mINFO    [bioservices.BioModels:363]: [0m [32mInitialising BioModels service (REST)[0m


2025-05-02 14:49:28,415 - INFO - Initialising BioModels service (REST)


In [4]:
bioservices_get_models()


Total models : 68


Error : [Errno 2] No such file or directory: '/Users/guillaume.souede/PycharmProjects/immunogit/metadata/metadata_all.json/model_metadata.json'
Downloading batch 1: Models 1 to 68


[32mINFO    [bioservices.BioModels:240]: [0m [32m/Users/guillaume.souede/PycharmProjects/immunogit/models/BioModels/SBML/Biomodels_1_to_68.zip[0m


2025-05-02 14:49:47,363 - INFO - /Users/guillaume.souede/PycharmProjects/immunogit/models/BioModels/SBML/Biomodels_1_to_68.zip


Downloaded models 1 to 68 into /Users/guillaume.souede/PycharmProjects/immunogit/models/BioModels/SBML/Biomodels_1_to_68.zip
All models consolidated into /Users/guillaume.souede/PycharmProjects/immunogit/models/BioModels/SBML/biomodels_filtered.zip
Models downloaded and saved in /Users/guillaume.souede/PycharmProjects/immunogit/models/BioModels/SBML/biomodels_filtered.zip
