# Training Notebook

Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

In [None]:
import sys, os
# Adjust this to match the relative location of the notebook to the 'src' directory
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from IPython import display

from forgather.config import load_config, ConfigEnvironment
from aiws.notebooks import training_loop, get_train_cmdline, make_train_script
from aiws.config import base_preprocessor_globals
import aiws.notebooks as nb

# Defaults
project_directory = "compare_trainers"
config_template = "project.yaml"

fc_project_directory = None
fc_config_template = None

### Load Project Meta-config

In [None]:
# Optional: override default project directory with widget file selector
from ipyfilechooser import FileChooser

fc_project_directory = FileChooser(
    show_only_dirs=True,
    select_desc="Select project directory",
)
display.display(fc_project_directory)

In [None]:
# Load project meta-configuration
if fc_project_directory is not None:
    project_directory = os.path.relpath(fc_project_directory.selected)
meta_config_path = os.path.join(project_directory, 'meta_config.yaml')
metacfg = load_config(meta_config_path, project_directory=project_directory)
nb.display_meta_config(meta_config_path, metacfg, "### Meta Config\n")

### Load Project Configuration

In [None]:
# Optional: override default configuration with widget file selector
fc_config_template = FileChooser(
    path=project_directory,
    filter_pattern=["*.yaml"],
    sandbox_path=project_directory,
    select_desc="Select a configuration to load",
)
display.display(fc_config_template)

In [None]:
# Optional: Load and display experiment information.
if fc_project_directory is not None:
    config_template_path = os.path.relpath(fc_config_template.selected)
else:
    config_template_path = os.path.join(metacfg.project_templates, config_template)

cfg_environment = ConfigEnvironment(
    searchpath=metacfg.search_paths,
    # Add project directory to base globals
    globals=base_preprocessor_globals() | dict(project_directory=project_directory)
)
loaded_config = cfg_environment.load(config_template_path).config
print(f"Project: {project_directory}")
print(f"Configuration: {config_template_path}")
print(f"Name: {loaded_config.experiment_name}")
print(f"Description: {loaded_config.experiment_description}")
print(f"Output Directory: {loaded_config.output_dir}")
print(f"Logging Directory: {loaded_config.logging_dir}")
print(f"Save Model: {loaded_config.do_save}")

### Train in Notebook
This will train directly in the notebook using Accelerate's '[notebook_launcher](https://huggingface.co/docs/accelerate/basic_tutorials/notebook)'

In [None]:
from accelerate import notebook_launcher

notebook_launcher(
    training_loop,
    args=(project_directory, config_template_path,),
    num_processes=1
)

### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

i.e. If you wish to only CUDA 0 and 1, then "0,1"

```
.../my_project$ ./train.sh
```

In [None]:
# Select name of generaed script.
script_name = 'train.sh'

make_train_script(
    project_directory=project_directory,
    config_template=config_template,
    script_name=script_name,
    cuda_devices="0")

# Read back to verify
with open(os.path.join(project_directory, script_name), 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display.display(display.Markdown(md))

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
!{train_cmdline(metacfg, cuda_devices="0,1")} '{experiment_path}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
!tensorboard --bind_all --logdir "{loaded_config.output_dir}"