In [43]:
import os
import zipfile
import requests
import json
from bioservices import BioModels

In [44]:
s = BioModels()

[32mINFO    [bioservices.BioModels:363]: [0m [32mInitialising BioModels service (REST)[0m


In [45]:
def get_all_models(query, page_size=10):
    """
    Retrieve all models matching the query using pagination.
    """
    models = []
    offset = 0

    while True:
        try:
            search_results = s.search(query, offset=offset)

            if 'models' not in search_results or not search_results['models']:
                break

            models.extend(search_results['models'])
            offset += page_size
            print(f"Page {offset // page_size} downloaded, {len(search_results['models'])} models retrieved.")
        
        except Exception as e:
            print(f"Error while retrieving models: {e}")
            break

    return models

In [46]:
def download_model_file(model_id, sbml_url, directory):
    """
    Downloads the SBML file of the given model.
    """
    try:
        sbml_filename = f"{model_id}.xml"
        model_path = os.path.join(directory, sbml_filename)

        response = requests.get(sbml_url)
        if response.status_code == 200:
            with open(model_path, 'wb') as f :
                f.write(response.content)
            print(f"Model {model_id} downloaded successfully.")
        else:
            raise RuntimeError(f"Failed to download model {model_id}: HTTP {response.status_code}")

        return model_path

    except Exception as e:
        print(f"Error while downloading model {model_id}: {e}")
        return None

In [47]:
import os
import requests

def get_latest_version(model_id):
    """
    Uses the BioModels API to retrieve the latest version of a given model.
    """
    api_url = f"https://www.ebi.ac.uk/biomodels/model/download/{model_id}?format=xml"
    response = requests.head(api_url, allow_redirects=True)

    if response.status_code == 200:
        # /ID.version is for eg. : /BIOMD0000000955.5.xml
        final_url = response.url
        version = final_url.split(".")[-2]  # before .xml
        return version
    else:
        raise RuntimeError(f"Unable to retrieve version for {model_id}: HTTP {response.status_code}")

def download_model_file(model_id, directory):
    """
    Downloads the SBML file for the latest version of the specified model.
    """
    try:
        version = get_latest_version(model_id)
        sbml_url = f"https://www.ebi.ac.uk/biomodels/model/download/{model_id}?format=xml"
        sbml_filename = f"{model_id}.xml"
        model_path = os.path.join(directory, sbml_filename)

        response = requests.get(sbml_url)
        if response.status_code == 200:
            with open(model_path, 'wb') as f :
                f.write(response.content)
            return model_path
        else:
            raise RuntimeError(f"Download failed for model {model_id}: HTTP {response.status_code}")

    except Exception:
        return None


In [48]:
import os
import json
import zipfile

def download_model_with_metadata(model_data, base_directory):
    """
    Downloads the SBML model and its metadata, then saves both in a ZIP archive.
    """
    try:
        model_id = model_data['id']
        title = model_data.get('name', "").lower()
        keywords = model_data.get('submitter_keywords', "").lower()

        # Determine target directory based on keywords in title
        if "therapy" in title:
            directory = os.path.join(base_directory, "Therapy")
        elif "immune system" in title:
            directory = os.path.join(base_directory, "Systeme_immun")
        elif "immune response" in title:
            directory = os.path.join(base_directory, "Reponse_immun")
        else:
            directory = os.path.join(base_directory, "Autres")

        os.makedirs(directory, exist_ok=True)

        # Download the SBML file
        model_path = download_model_file(model_id, directory)
        if model_path is None:
            return

        # Retrieve full metadata
        try:
            full_metadata = s.get_model(model_id)
        except Exception:
            return

        # Save metadata as JSON
        metadata_filename = f"{model_id}_metadata.json"
        metadata_path = os.path.join(directory, metadata_filename)
        with open(metadata_path, 'w', encoding='utf-8') as f :
            json.dump(full_metadata, f, ensure_ascii=False, indent=4)

        # Create ZIP archive
        zip_filename = os.path.join(directory, f"{model_id}.zip")
        with zipfile.ZipFile(zip_filename, 'w') as zipf:
            zipf.write(model_path, os.path.basename(model_path))
            zipf.write(metadata_path, os.path.basename(metadata_path))

        # Clean up intermediate files
        os.remove(model_path)
        os.remove(metadata_path)

    except Exception:
        return


In [49]:
# Define query
query = (
    'immun* AND curationstatus:"Manually curated" AND modelformat:"SBML" '
    'AND TAXONOMY:9606 AND NOT submitter_keywords:"Immuno-oncology"'
)

# Create output directory
base_directory = "downloaded_models"
os.makedirs(base_directory, exist_ok=True)

# Fetch all models matching the query
models = get_all_models(query)

# Download and organize each model
for model_data in models:
    download_model_with_metadata(model_data, base_directory)

Page 1 downloaded, 10 models retrieved.
Page 2 downloaded, 10 models retrieved.
Page 3 downloaded, 10 models retrieved.
Page 4 downloaded, 10 models retrieved.
Page 3 downloaded, 10 models retrieved.
Page 4 downloaded, 10 models retrieved.
Page 5 downloaded, 10 models retrieved.
Page 6 downloaded, 10 models retrieved.
Page 5 downloaded, 10 models retrieved.
Page 6 downloaded, 10 models retrieved.
Page 7 downloaded, 3 models retrieved.
Page 7 downloaded, 3 models retrieved.
