# Training Notebook
Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

## Setup

In [None]:
import os

projects_directory =  "/home/dinalt/ai_assets/forgather/examples/trainers/tiny_models"
config_template = ""

# Path to training script to use.
train_script_path = os.path.join('..', 'scripts', 'train_script.py')

## Project Info

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)

from pprint import pp, pformat
from IPython import display as ds
from forgather import Project
import forgather.nb.notebooks as nb

# Load the project
proj = Project(projects_directory, config_template)

# Show project info
md = ""
md += nb.render_project_readme(proj.project_dir)
md += nb.render_meta(proj.meta, "### Meta Config\n")
md += nb.render_template_list(proj.meta.find_templates(proj.meta.config_prefix), "### Available Configurations\n")

# Only construct the meta object
config_meta = proj.config.meta()
md += f"### {config_meta['config_name']}:\n\n"
md += nb.render_codeblock("python", pformat(config_meta))
display(ds.Markdown(md))

### Launch Notebook Trainer

In [None]:
from accelerate import notebook_launcher
from forgather.ml.training_script import training_loop

notebook_launcher(
    training_loop,
    args=(proj.project_dir, proj.config_name),
    num_processes=1
)

### Run All Configurations

In [None]:
from accelerate import notebook_launcher
from forgather.ml.training_script import training_loop

#os.environ['CUDA_VISIBLE_DEVICES'] = str(3)
def run_all_configurations():
    for proj.config_name, _ in proj.meta.find_templates(proj.meta.config_prefix):
        print(f"{ ' Starting ' + proj.config_name + ' ':-^60}")
        notebook_launcher(
            training_loop,
            args=(proj.project_dir, proj.config_name,),
            num_processes=1
        )

run_all_configurations()

### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

In [None]:
def generate_script(cuda_devices=None):
    script_name = os.path.splitext(os.path.basename(proj.config_name))[0] + ".sh"
    nb.make_train_script(
        train_script_path=os.path.abspath(train_script_path),
        project_directory=proj.project_dir,
        config_template=proj.config_name,
        script_name=script_name,
        cuda_devices=cuda_devices)

    # Read back to verify
    script_path = os.path.join(proj.project_dir, script_name)
    with open(script_path, 'r') as f:
        md = (
            f"#### Generated Shell Script\n"
            f"[{script_name}]({os.path.relpath(script_path)})\n"
            f"```bash\n{f.read()}\n```"
        )
        display(ds.Markdown(md))
generate_script("3")

In [None]:
# Assign sequential GPUs to each configuration
def sequential_devices(i=0):
    while True:
        yield str(i)
        i += 1

# Assign the same fixed set of GPUs to each config
def same_devices(devices="0,1"):
    while True:
        yield devices

# Assign all GPUs to all configs
def all_devices():
    while True:
        yield None

def generate_all_scripts(device_iter=all_devices()):
    for devices, (proj.config_name, _) in zip(device_iter, proj.meta.find_templates(proj.meta.config_prefix)):
        script_name = os.path.splitext(proj.config_name)[0] + ".sh"
        nb.make_train_script(
            train_script_path=os.path.abspath(train_script_path),
            project_directory=proj.project_dir,
            config_template=proj.config_name,
            script_name=script_name,
            cuda_devices=devices)
        script_path = os.path.join(proj.project_dir, script_name)
        with open(script_path, 'r') as f:
            
            md = (
                f"[{script_name}]({os.path.relpath(script_path)})\n"
                f"```bash\n{f.read()}\n```"
            )
            display(ds.Markdown(md))

generate_all_scripts(sequential_devices(3))

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
print(f"{nb.get_train_cmdline(train_script_path, proj.meta, cuda_devices='0')} '{proj.config_name}'")

In [None]:
!{nb.get_train_cmdline(train_script_path, proj.meta, cuda_devices="0")} '{proj.config_name}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
# All models
!tensorboard --bind_all --logdir "{config_meta['models_dir']}"

In [None]:
# Current model only
!tensorboard --bind_all --logdir "{config_meta['output_dir']}"

### Cleanup
Note: These will show the target directory and ask for confirmation before proceeding.

#### Delete All

In [None]:
nb.delete_dir(config_meta['models_dir'], "Delete all models in project")

#### Delete Configuration Output Directory

In [None]:
nb.delete_dir(config_meta['output_dir'], "Delete output directory")