# Notebook template for executing code using DerivaML.

This notebook should be modified to perform the desired calcuation and *COMITTED* to Gihub prior to execution.

In [None]:
from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, MLVocab
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
from IPython.display import display, Markdown
from typing import Any
import os

## Parameters cell


In [None]:
datasets: list[DatasetSpec] = []
models: list[RID] = []
parameters: dict[str, Any] = {}
dry_run = False
hostname = None
catalog_id = None

In [None]:
# Modify these to your desired server and catalog.
hostname = hostname or os.environ.get("DERIVA_HOST")
catalog_id = catalog_id or os.environ.get("DERIVA_CATALOG_ID")

gnl = GlobusNativeLogin(host=hostname)
if gnl.is_logged_in([hostname]):
    print("You are already logged in.")
else:
    gnl.login([hostname], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)
    print("Login Successful")

# Change this line to call the domain specific class derived from DerivaML
deriva_ml = DerivaML(hostname=hostname, catalog_id=catalog_id)
print(f'Executing script {deriva_ml.executable_path} version: {deriva_ml.get_version()}')

Provide a function that does the desired calculation.  Access to the downloaded datasets and assets is provided via the execution object.  Place any files to be uploaded into the correct locations using the methods provided via the  execution object.

In [None]:
def do_stuff(execution: Execution):
        print(f" Execution with parameters: {execution.parameters}")
        print(f" Execution with input assets: {[a.as_posix() for a in execution.asset_paths]}")
        print(f"Execution datasets: {execution.datasets}")

In [None]:
# Create a workflow instance for this specific version of the notebook.  Return an existing workflow if one is found.
deriva_ml.add_term(MLVocab.workflow_type, "Demo Notebook", description="Initial setup of Model Notebook")

# Create an execution instance that will work with the latest version of the input datasets.
config = ExecutionConfiguration(
    datasets=[DatasetSpec(rid=dataset, version=deriva_ml.dataset_version(dataset)) for dataset in datasets],
    assets=models,
    workflow=deriva_ml.create_workflow('demo-workflow', 'Demo Notebook'),
    parameters=parameters,
)
execution = deriva_ml.create_execution(config, dry_run=dry_run)
with execution as e:
    do_stuff(e)

execution.upload_execution_outputs()