# Training Notebook
Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

## Setup

In [None]:
# Set defaults
default_projects_directory = '../example_projects'
default_project = ""
config_template = "tiny_mistral.yaml"

from ipyfilechooser import FileChooser
import os
fc = FileChooser(
    os.path.join(default_projects_directory, default_project), show_only_dirs=True,
    title="Select a Project Directory", select_default=True)
display(fc)

## Project Info

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
from IPython import display as ds
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from forgather.config import load_config, ConfigEnvironment
from aiws.notebooks import get_train_cmdline, make_train_script
from aiws.config import base_preprocessor_globals, MetaConfig
from aiws.training_loop import TrainingScriptConfig
import aiws.notebooks as nb

nb.show_project_readme(fc.selected_path)
meta = MetaConfig(fc.selected_path)
nb.display_meta(meta, "### Meta Config\n")
nb.list_templates(meta.find_templates(meta.config_prefix), "### Available Configurations\n")
# Use default, if unspecified.
default_config = next(meta.find_templates(meta.config_prefix))[0]
config_template = default_config if len(config_template) == 0 else config_template

config_template_path = os.path.join(meta.config_prefix, config_template)
environment = ConfigEnvironment(
    searchpath=meta.searchpath,
    globals = base_preprocessor_globals() | dict(project_directory=fc.selected_path)
)

config = environment.load(config_template_path).config
print(f"{' Active Configuration ':-^60}")
print(f"Project: {fc.selected_path}")
print(f"Configuration: {config_template_path}")
print(f"Name: {config.experiment_name}")
print(f"Description: {config.experiment_description}")
print(f"Output Directory: {config.output_dir}")
print(f"Logging Directory: {config.logging_dir}")
print(f"Save Model: {config.do_save}")

### Launch Notebook Trainer

In [None]:
from accelerate import notebook_launcher
from aiws.training_loop import training_loop

notebook_launcher(
    training_loop,
    args=(fc.selected_path, config_template,),
    num_processes=1
)

### Run All Configurations

In [None]:
from accelerate import notebook_launcher
from aiws.training_loop import training_loop

def run_all_configurations():
    for config_template, _ in meta.find_templates(meta.config_prefix):
        print(f"{ ' Starting ' + config_template + ' ':-^60}")
        notebook_launcher(
            training_loop,
            args=(fc.selected_path, config_template,),
            num_processes=1
        )

run_all_configurations()

### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

In [None]:
# Select name of generaed script.
script_name = 'train.sh'

make_train_script(
    project_directory=fc.selected_path,
    config_template=config_template,
    script_name=script_name,
    cuda_devices="0,1")

# Read back to verify
with open(os.path.join(fc.selected_path, script_name), 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display(ds.Markdown(md))

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
print(f"{get_train_cmdline(meta, cuda_devices='0')} '{config_template}'")

In [None]:
!{get_train_cmdline(meta, cuda_devices="0")} '{config_template}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
# All models
!tensorboard --bind_all --logdir "{config.models_dir}"

In [None]:
# Current model only
!tensorboard --bind_all --logdir "{config.output_dir}"

### Cleanup
Note: These will show the target directory and ask for confirmation before proceeding.

#### Delete All

In [None]:
nb.delete_dir(config.models_dir, "Delete all models in project")

#### Delete Configuration Output Directory

In [None]:
nb.delete_dir(config.output_dir, "Delete output directory")