# Notebook template for executing code using DerivaML.

This notebook should be modified to perform the desired calculation and *COMMITTED* to Github prior to execution.

In [None]:
from deriva_ml import DerivaML, ExecutionConfiguration, MLVocab, Execution, RID, DerivaMLConfig, DatasetConfigList
from hydra_zen import launch, zen, builds

import configure

## Parameters cell


In [None]:
# Set up the default configuration for the notebook
overrides = [
    "+dryrun=True"
]

In [None]:
# Define the configuration parameters for the notebook
deriva_ml: DerivaMLConfig
dataset: DatasetConfigList
assets: list[RID]
dry_run: bool

# Load an initialize the configuration store
store = configure.init_config()

# Modify this to include any additional configuration options, such as model parameters.
def get_configuration(
    deriva_ml: DerivaMLConfig,
    datasets: DatasetConfigList,
    assets: list[RID],
    dry_run: bool = False,
):
    return tuple(locals().values())

NotebookConfig = builds(get_configuration, populate_full_signature=True,
                        hydra_defaults=["_self_", {"deriva_ml": "local"},
                                        {"datasets": "test1"},
                                        {"assets": "asset1"}],)

store(NotebookConfig, name="notebook_config")
store.add_to_hydra_store(overwrite_ok=True)
deriva_ml, datasets, assets, dry_run =  launch(NotebookConfig,
                                               zen(get_configuration),
                                               version_base="1.3",
                                               config_name="notebook_config",
                                               job_name="Demo Notebook",
                                               overrides=overrides).return_value


display("Datasets", datasets)
display("Assets", assets)

In [None]:
# Change this line to call the domain specific class derived from DerivaML
ml_instance = DerivaML(**deriva_ml.model_dump())

Provide a function that does the desired calculation.  Access to the downloaded datasets and assets is provided via the execution object.  Place any files to be uploaded into the correct locations using the methods provided via the  execution object.

In [None]:
def do_stuff(execution: Execution):
        print(f" Execution with input assets: {execution.asset_paths}")
        print(f"Execution datasets: {execution.datasets}")

In [None]:
# Create a workflow instance for this specific version of the notebook.  Return an existing workflow if one is found.
ml_instance.add_term(MLVocab.workflow_type, "Demo Notebook", description="Initial setup of Model Notebook")

# Create an execution instance that will work with the latest version of the input datasets.
config = ExecutionConfiguration(
    datasets=datasets,
    assets=assets,
    workflow=ml_instance.create_workflow('demo-workflow', 'Demo Notebook'),
)
execution = ml_instance.create_execution(config, dry_run=dry_run)
with execution as e:
    do_stuff(e)

execution.upload_execution_outputs()