In [1]:
import os
import zipfile
import requests
import json
from bioservices import BioModels

In [2]:
s = BioModels()

[32mINFO    [bioservices.BioModels:363]: [0m [32mInitialising BioModels service (REST)[0m


In [3]:
def get_all_models(query, page_size=10):
    """
    Fonction pour récupérer tous les modèles correspondant à la requête avec pagination.
    """
    models = []
    offset = 0

    while True:
        try:
            search_results = s.search(query, offset=offset)
            
            if 'models' not in search_results or not search_results['models']:
                break
            
            models.extend(search_results['models'])
            offset += page_size
            print(f"Page {offset // page_size} téléchargée, {len(search_results['models'])} modèles récupérés.")
        
        except Exception as e:
            print(f"Erreur lors de la récupération des modèles : {e}")
            break
    
    return models

In [4]:
def download_model_file(model_id, directory):
    """
    Télécharge le fichier sbml du modele
    """
    try:
        # Créer le chemin du ZIP
        zip_filename = f"{model_id}.zip"
        zip_path = os.path.join(directory, zip_filename)

        # Télécharger le ZIP
        s.get_model_download(model_id, output_filename=zip_path)

        # Extraire uniquement le premier fichier .xml trouvé
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            xml_files = [
                f for f in zip_ref.namelist()
                if ((f.lower().endswith('.xml') or f.lower().endswith('.XML'))
                    and not f.lower().endswith('_urn.xml') 
                    and not f.lower().endswith('manifest.xml'))
                ]

            if not xml_files:
                xml_files = [f for f in zip_ref.namelist()
                             if f.lower().endswith('.sbml') 
                             and not f.lower().endswith('_urn.xml') 
                             and not f.lower().endswith('manifest.xml')]
    
            if not xml_files:
                print(f"Aucun fichier SBML/XML trouvÃ© dans le ZIP pour {model_id}")
                return None

            xml_filename = xml_files[0] 
            zip_ref.extract(xml_filename, directory)

            extracted_path = os.path.join(directory, os.path.basename(xml_filename))
            print(f"Fichier SBML extrait : {extracted_path}")

        # Supprimer le fichier ZIP
        os.remove(zip_path)

        return extracted_path

    except Exception as e:
        print(f"Erreur lors du téléchargement du modéle {model_id}: {e}")
        return None


In [5]:
def download_model_with_metadata(model_data, base_directory) :
    """
    Download a model and its metadata, then save them into a ZIP file.

    Args :
        model_data (dict) : A dictionary containing model information, including at least 'id' and 'url'.
        base_directory (str) : Path to the root directory where the model should be saved.

    Returns :
        None
    """
    try :
        model_id = model_data['id']
        #sbml_url = model_data.get('url', None)
        title = model_data.get('name', "").lower()
        keywords = model_data.get('submitter_keywords', "").lower()
        immun = model_data.get('immun', "").lower()
        
        def contains_keyword(data, keyword) :
            """
            Recursively search for a keyword ('kw') in all string values of a nested element.

            Args :
                data (any) : The data to search (can be dict, list, str).
                keyword (str) : The keyword to search for.

            Returns :
                bool : True if keyword is found. False otherwise.
            """
            if isinstance(data, dict) :
                return any(contains_keyword(v, keyword) for v in data.values())
            elif isinstance(data, list) :
                return any(contains_keyword(item, keyword) for item in data)
            elif isinstance(data, str) :
                return keyword.lower() in data.lower()
            return False


        # Retrieve full metadata
        try :
            full_metadata = s.get_model(model_id)
        except Exception as e :
            print(f"Error retrieving full metadata for {model_id} : {e}")
            return

        # Determine destination directory based on content
        if contains_keyword(full_metadata, "immun") :
            directory = os.path.join(base_directory, "immun")
            directory = os.path.join(directory, "Curated_models" if "BIOM" in model_id else "No_Curated_models")
        elif contains_keyword(full_metadata, "T cell") :
            directory = os.path.join(base_directory, "T-cell")
            directory = os.path.join(directory, "Curated_models" if "BIOM" in model_id else "No_Curated_models")
        else :
            return

        os.makedirs(directory, exist_ok=True)

        # Download SBML file
        model_path = download_model_file(model_id, directory)
        if model_path is None :
            return

        # Save metadata as JSON
        metadata_filename = f"{model_id}_metadata.json"
        metadata_path = os.path.join(directory, metadata_filename)
        with open(metadata_path, 'w', encoding='utf-8') as f :
            json.dump(full_metadata, f, ensure_ascii=False, indent=4)

        # Create ZIP file containing model and metadata
        zip_filename = os.path.join(directory, f"{model_id}.zip")
        with zipfile.ZipFile(zip_filename, 'w') as zipf :
            zipf.write(model_path, os.path.basename(model_path))
            zipf.write(metadata_path, os.path.basename(metadata_path))

        # Remove temporary files
        os.remove(model_path)
        os.remove(metadata_path)

        print(f"Model {model_id} and its metadata saved to {zip_filename}")

    except Exception as e :
        print(f"An error occurred while processing model {model_data['id']} : {e}")

In [8]:
def main():
    # Requête mise à jour
    query = (
        'boolean AND modelformat:"SBML" AND NOT modellingapproach:"logical model"'
    )

    # Répertoire principal
    base_directory = "downloaded_models_boolean"
    os.makedirs(base_directory, exist_ok=True)

    # Obtenir tous les modèles
    models = get_all_models(query)

    # Télécharger chaque modèle et les classer
    for model_data in models:
        download_model_with_metadata(model_data, base_directory)

In [None]:
if __name__ == "__main__":
    main()

Page 1 téléchargée, 10 modèles récupérés.
Page 2 téléchargée, 5 modèles récupérés.


[32mINFO    [bioservices.BioModels:171]: [0m [32mSaving file MODEL2312140001.zip[0m


Fichier SBML extrait : downloaded_models_logical\T-cell\No_Curated_models\MiDAS_Cell_Cycle_Arrests_Apoptosis_Fine.XML
Model MODEL2312140001 and its metadata saved to downloaded_models_logical\T-cell\No_Curated_models\MODEL2312140001.zip


[32mINFO    [bioservices.BioModels:171]: [0m [32mSaving file MODEL2006170002.zip[0m


Fichier SBML extrait : downloaded_models_logical\T-cell\No_Curated_models\Sizek_Regan_PI3K_growth_CellCycle_Apoptosis.sbml
Model MODEL2006170002 and its metadata saved to downloaded_models_logical\T-cell\No_Curated_models\MODEL2006170002.zip
