# Training Notebook
Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

## Setup

In [None]:
# Set defaults
#default_projects_directory = '/home/dinalt/ai_assets/projects/experiments'
default_projects_directory = '../examples/trainers'
default_project = "dynamic_models"
config_template = "swi-glu.yaml"
# swi-glu.yaml
from ipyfilechooser import FileChooser
import os
fc = FileChooser(
    os.path.join(default_projects_directory, default_project), show_only_dirs=True,
    title="Select a Project Directory", select_default=True)
display(fc)

## Project Info

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
from IPython import display as ds
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from forgather.latent import Latent
from forgather.config import ConfigEnvironment, pconfig
from forgather.ml.notebooks import get_train_cmdline, make_train_script
from forgather.meta_config import preprocessor_globals, MetaConfig
from forgather.ml.training_script import TrainingScript
import forgather.ml.notebooks as nb

# Define location of commopn train-script
train_script_path = os.path.join('..', 'scripts', 'train_script.py')

nb.show_project_readme(fc.selected_path)
meta = MetaConfig(fc.selected_path)
nb.display_meta(meta, "### Meta Config\n")
nb.list_templates(meta.find_templates(meta.config_prefix), "### Available Configurations\n")
environment = ConfigEnvironment(
    searchpath=meta.searchpath,
    global_vars = preprocessor_globals(fc.selected_path),
)

config = environment.load(meta.config_path(config_template)).config
config_meta = config.meta()
print(f"{' ' + config_meta['config_name'] + ' ':-^60}")
pconfig(config.meta)
if len(config_template) == 0:
    config_template = meta.default_config()

### Launch Notebook Trainer

In [None]:
from accelerate import notebook_launcher
from forgather.ml.training_script import training_loop

notebook_launcher(
    training_loop,
    args=(fc.selected_path, config_template),
    num_processes=1
)

### Run All Configurations

In [None]:
from accelerate import notebook_launcher
from forgather.ml.training_script import training_loop

#os.environ['CUDA_VISIBLE_DEVICES'] = str(3)
def run_all_configurations():
    for config_template, _ in meta.find_templates(meta.config_prefix):
        print(f"{ ' Starting ' + config_template + ' ':-^60}")
        notebook_launcher(
            training_loop,
            args=(fc.selected_path, config_template,),
            num_processes=1
        )

run_all_configurations()

### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

In [None]:
def generate_script(cuda_devices=None):
    script_name = os.path.splitext(os.path.basename(config_template))[0] + ".sh"
    make_train_script(
        train_script_path=os.path.abspath(train_script_path),
        project_directory=fc.selected_path,
        config_template=config_template,
        script_name=script_name,
        cuda_devices=cuda_devices)

    # Read back to verify
    script_path = os.path.join(fc.selected_path, script_name)
    with open(script_path, 'r') as f:
        md = (
            f"#### Generated Shell Script\n"
            f"[{script_name}]({os.path.relpath(script_path)})\n"
            f"```bash\n{f.read()}\n```"
        )
        display(ds.Markdown(md))
generate_script("3")

In [None]:
# Assign sequential GPUs to each configuration
def sequential_devices(i=0):
    while True:
        yield str(i)
        i += 1

# Assign the same fixed set of GPUs to each config
def same_devices(devices="0,1"):
    while True:
        yield devices

# Assign all GPUs to all configs
def all_devices():
    while True:
        yield None

def generate_all_scripts(device_iter=all_devices()):
    for devices, (config_template, _) in zip(device_iter, meta.find_templates(meta.config_prefix)):
        script_name = os.path.splitext(config_template)[0] + ".sh"
        make_train_script(
            train_script_path=os.path.abspath(train_script_path),
            project_directory=fc.selected_path,
            config_template=config_template,
            script_name=script_name,
            cuda_devices=devices)
        script_path = os.path.join(fc.selected_path, script_name)
        with open(script_path, 'r') as f:
            
            md = (
                f"[{script_name}]({os.path.relpath(script_path)})\n"
                f"```bash\n{f.read()}\n```"
            )
            display(ds.Markdown(md))

generate_all_scripts(same_devices("3"))

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
print(f"{get_train_cmdline(train_script_path, meta, cuda_devices='0')} '{config_template}'")

In [None]:
!{get_train_cmdline(train_script_path, meta, cuda_devices="0")} '{config_template}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
# All models
!tensorboard --bind_all --logdir "{config.models_dir}"

In [None]:
# Current model only
!tensorboard --bind_all --logdir "{config.output_dir}"

### Cleanup
Note: These will show the target directory and ask for confirmation before proceeding.

#### Delete All

In [None]:
nb.delete_dir(config.meta['models_dir'], "Delete all models in project")

#### Delete Configuration Output Directory

In [None]:
nb.delete_dir(config.training_script['output_dir'], "Delete output directory")