# Notebook template for executing code using DerivaML.

This notebook should be modified to perform the desired calculation and *COMMITTED* to Github prior to execution.

In [None]:
from typing import Any
from deriva_ml import DerivaML, ExecutionConfiguration, MLVocab, Execution, RID, DerivaMLConfig, DatasetConfigList
from hydra_zen import launch, zen, builds

from dataclasses import dataclass
from pydantic_core.core_schema import dataclass_schema
import inspect

import configure

## Parameters cell

Set up any parameters that you want in the overrides for the configuration.  Having an "=" sign in a parameter value messes up papermill, so we will need to provide each override seperately, and they assemble the override list in next, non-parameter cell.

In [None]:
# Set up the default configuration for the notebook.
dry_run: bool=False

In [None]:
overrides = [f"cfg.dry_run={dry_run}"]
overrides = []

Now we can set up the hydra configuration for this notebook.  We will place all of our possible configrations in a seperate file and assemble them into a single configuration object using the `builds` function provided by hydra_zen.

In [None]:
# Load an initialize the configuration store
store = configure.init_config()

Provide a function that does the desired calculation.  Access to the downloaded datasets and assets is provided via the execution object.  Place any files to be uploaded into the correct locations using the methods provided via the  execution object.

In [None]:
# This is our simple model function.
def model(learning_rate: float, epochs: int, execution: Execution):
    print(f"Training with learning rate: {learning_rate} and epochs: {epochs} and dataset")
    print(execution.datasets)

# Build a configuration interface for our model, providing default values. The execution value will be populated later
# at runtime, not configuration time.
ModelConfig = builds(model, learning_rate=1e-3, epochs=10,
                     populate_full_signature=True,
                     zen_partial=True)
model_store = store(group="model_config")
model_store(ModelConfig, name="model1", learning_rate=1e-3, epochs=10)
model_store(ModelConfig, name="model2", learning_rate=23, epochs=20)

Since we are using Hydra-Zen, we need to create a configuration object for this notebook.  The standard hydra approach would be to encapsulate the entire job in a single function and then capture the configuration as arguments to that function. Since we are in a notebook and may not want to have all of the operations in a single function, we will use create a briding function that will have all of the configuration parameters as arguments and whose sole purpose is to expose those parameters as veriables to the notebook.

The parameters for the get_configuration function will need to be changed if we need to alter the set of variables that the notebook will need.

In [None]:
# Define the configuration parameters for the notebook

deriva_ml: DerivaMLConfig
datasets: DatasetConfigList
assets: list[RID]
model_config: Any
dry_run: bool

def get_configuration(
        deriva_ml: DerivaMLConfig,
        datasets: DatasetConfigList,
        assets: list[RID],
        model_config: Any,
        dry_run: bool
):
    signature = inspect.signature(get_configuration)
    parameter_names = [param.name for param in signature.parameters.values()]
    vars = locals()
    return tuple([vars[name] for name in parameter_names])

notebook_defaults = [
    "_self_",
    {"deriva_ml": "local"},
    {"datasets": "test1"},
    {"assets": "asset1"},
    {"model_config": "model1"},
]

NotebookConfig = builds(
    get_configuration,
    populate_full_signature=True,
    dry_run=False,
    hydra_defaults=notebook_defaults
)

store(NotebookConfig, name="notebook_config")

store.add_to_hydra_store(overwrite_ok=True)
jr = launch(NotebookConfig,
            zen(get_configuration),
            version_base="1.3",
            config_name="notebook_config",
            job_name="DemoNotebook",
            overrides=overrides)

deriva_ml, datasets, assets, model_config, dry_run = jr.return_value


In [None]:
# Change this line to call the domain specific class derived from DerivaML
ml_instance = DerivaML(**deriva_ml.model_dump())

In [None]:
# Create a workflow instance for this specific version of the notebook.  Return an existing workflow if one is found.
ml_instance.add_term(MLVocab.workflow_type, "Demo Notebook", description="Initial setup of Model Notebook")

# Create an execution instance that will work with the latest version of the input datasets.
config = ExecutionConfiguration(
    datasets=datasets,
    assets=assets,
    workflow=ml_instance.create_workflow('demo-workflow', 'Demo Notebook'),
)
execution = ml_instance.create_execution(config, dry_run=dry_run)
with execution as e:
    model_config(execution=e)

execution.upload_execution_outputs()