# Training Notebook

Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

https://huggingface.co/blog/codeparrot

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from IPython import display

from forgather.config import load_config, ConfigEnvironment
from aiws.notebooks import get_train_cmdline, make_train_script
from aiws.config import base_preprocessor_globals, MetaConfig
from aiws.training_loop import TrainingScriptConfig
import aiws.notebooks as nb

# Set project:
project_directory = "exclude_params"

# Set configuration:
config_template = "optifactory.yaml"


nb.show_project_readme(project_directory)
meta = MetaConfig(project_directory)
nb.display_meta(meta, "### Meta Config\n")
nb.list_templates(meta.find_templates(meta.config_prefix), "### Available Configurations\n")
config_template_path = os.path.join(meta.config_prefix, config_template)
environment = ConfigEnvironment(
    searchpath=meta.searchpath,
    globals = base_preprocessor_globals() | dict(project_directory=project_directory)
)

config = environment.load(config_template_path).config
print(f"{' Active Configuration ':-^60}")
print(f"Project: {project_directory}")
print(f"Configuration: {config_template_path}")
print(f"Name: {config.experiment_name}")
print(f"Description: {config.experiment_description}")
print(f"Output Directory: {config.output_dir}")
print(f"Logging Directory: {config.logging_dir}")
print(f"Save Model: {config.do_save}")

### Launch Notebook Trainer

In [None]:
from accelerate import notebook_launcher
from aiws.training_loop import training_loop

notebook_launcher(
    training_loop,
    args=(project_directory, config_template,),
    num_processes=1
)

### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

i.e. If you wish to only CUDA 0 and 1, then "0,1"

```
.../my_project$ ./train.sh
```

In [None]:
# Select name of generaed script.
script_name = 'base_2gpu.sh'

make_train_script(
    project_directory=project_directory,
    config_template=config_template,
    script_name=script_name,
    cuda_devices="0,1")

# Read back to verify
with open(os.path.join(project_directory, script_name), 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display.display(display.Markdown(md))

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
print(f"{get_train_cmdline(meta, cuda_devices='0')} '{config_template}'")

In [None]:
!{get_train_cmdline(meta, cuda_devices="0")} '{config_template}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
!tensorboard --bind_all --logdir "{config.output_dir}"

#### Generate Bash Script

This will output a shell-script which will invoke the training script with the arguments for this project.

```bash
./train.sh path/to/experiment.yaml
```

If 'cuda_devices' is not None, this can restrict execution to a sub-set of available GPUs.
```python
# Restrict training to GPU's 0 and 1
make_bash_script(metacfg, cuda_devices="0,1")
```

In [None]:
make_bash_script(metacfg, cuda_devices="0")

# Read back to verify
with open('train.sh', 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display.display(display.Markdown(md))

### Cleanup
Note: These will show the target directory and ask for confirmation before proceeding.

#### Delete All

In [None]:
nb.delete_dir(metacfg.model_dir, "Delete all models in project")

#### Delete Configuration Output Directory

In [None]:
nb.delete_dir(config.output_dir, "Delete output directory")