# Training Notebook

Configuration details: [Configuration Notebook](project_config.ipynb)

Run project training configurations and generate training scripts from project meta-data.

https://huggingface.co/blog/codeparrot

In [4]:
import sys, os
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from IPython import display

from forgather.config import load_config, ConfigEnvironment
from aiws.notebooks import get_train_cmdline, make_train_script
from aiws.config import base_preprocessor_globals, MetaConfig
from aiws.training_loop import TrainingScriptConfig
import aiws.notebooks as nb

# Set project:
project_directory = "micro_llama"

# Set configuration:
config_template = "micro_llama.yaml"


nb.show_project_readme(project_directory)
meta = MetaConfig(project_directory)
nb.display_meta(meta, "### Meta Config\n")
nb.list_templates(meta.find_templates(meta.config_prefix), "### Available Configurations\n")
config_template_path = os.path.join(meta.config_prefix, config_template)
environment = ConfigEnvironment(
    searchpath=meta.searchpath,
    globals = base_preprocessor_globals() | dict(project_directory=project_directory)
)

config = environment.load(config_template_path).config
print(f"{' Active Configuration ':-^60}")
print(f"Project: {project_directory}")
print(f"Configuration: {config_template_path}")
print(f"Name: {config.experiment_name}")
print(f"Description: {config.experiment_description}")
print(f"Output Directory: {config.output_dir}")
print(f"Logging Directory: {config.logging_dir}")
print(f"Save Model: {config.do_save}")

## Micro Llama

Train a tiny llama model.

d=256, h=4, l=4, mlp=1024, tokenizer=TinyStories 2K

### Meta Config
Project Directory: micro_llama

Meta Config: [micro_llama/meta.yaml](micro_llama/meta.yaml)

Template Search Paths:
- [micro_llama/templates](micro_llama/templates)
- [../templates](../templates)


### Available Configurations
- [micro_llama.yaml](micro_llama/templates/configs/micro_llama.yaml)


------------------- Active Configuration -------------------
Project: micro_llama
Configuration: configs/micro_llama.yaml
Name: Micro Llama
Description: Train a tiny LLama model from scratch.
Output Directory: micro_llama/output_models/default_model
Logging Directory: micro_llama/output_models/default_model/runs/Micro Llama_1721346357964679937
Save Model: True


### Launch Notebook Trainer

In [3]:
from accelerate import notebook_launcher
from aiws.training_loop import training_loop

notebook_launcher(
    training_loop,
    args=(project_directory, config_template,),
    num_processes=1
)

Launching training on one GPU.
Creating directory: micro_llama/output_models/default_model/runs/Micro Llama_1721345624489813743
**** Training Started *****
experiment_name: Micro Llama
experiment_description: Train a tiny LLama model from scratch.
output_dir: micro_llama/output_models/default_model
logging_dir: micro_llama/output_models/default_model/runs/Micro Llama_1721345624489813743


  0%|                                                                                                         …

total_examples: 2,119,680
total_train_samples: 2,119,680
per_device_train_batch_size: 64
actual_per_device_batch_size: 64
total_train_batch_size: 64
max_steps: 2,000
total_parameters: 5.2M
trainable_parameters: 5.2M
model:
LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(2000, 256)
    (layers): ModuleList(
      (0-3): 4 x LlamaDecoderLayer(
        (self_attn): LlamaFlashAttention2(
          (q_proj): Linear(in_features=256, out_features=256, bias=False)
          (k_proj): Linear(in_features=256, out_features=256, bias=False)
          (v_proj): Linear(in_features=256, out_features=256, bias=False)
          (o_proj): Linear(in_features=256, out_features=256, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=256, out_features=1024, bias=False)
          (up_proj): Linear(in_features=256, out_features=1024, bias=False)
          (down_proj): Linear(in_features=1024, out_featur

  0%|                                                                                                         …

2024-07-18 23:34:27        1,000  0.03  eval-loss:  2.31061   
2024-07-18 23:34:31        1,100  0.03  train-loss: 2.25196   learning-rate: 1.00e-03
2024-07-18 23:34:35        1,200  0.04  train-loss: 2.21356   learning-rate: 1.00e-03
2024-07-18 23:34:39        1,300  0.04  train-loss: 2.19952   learning-rate: 1.00e-03
2024-07-18 23:34:43        1,400  0.04  train-loss: 2.15759   learning-rate: 1.00e-03
2024-07-18 23:34:47        1,500  0.05  train-loss: 2.03841   learning-rate: 1.00e-03
2024-07-18 23:34:50        1,600  0.05  train-loss: 2.11876   learning-rate: 1.00e-03
2024-07-18 23:34:55        1,700  0.05  train-loss: 2.00525   learning-rate: 1.00e-03
2024-07-18 23:34:59        1,800  0.05  train-loss: 2.02464   learning-rate: 1.00e-03
2024-07-18 23:35:03        1,900  0.06  train-loss: 1.96943   learning-rate: 1.00e-03
2024-07-18 23:35:08        2,000  0.06  train-loss: 2.03572   learning-rate: 1.00e-03


  0%|                                                                                                         …

2024-07-18 23:35:08        2,000  0.06  eval-loss:  2.01092   
train_runtime: 83.04
train_samples: 128,000
step: 2,000
train_samples_per_second: 1.541e+03
train_steps_per_second: 24.09
train_loss: 2.533
epoch: 0.06039

**** Training Completed *****
{'train_runtime': 83.0407645702362, 'train_samples': 128000, 'step': 2000, 'train_samples_per_second': 1541.412, 'train_steps_per_second': 24.085, 'train_loss': 2.5329842567443848, 'epoch': 0.06038647342995169}
[2024-07-18 23:35:08,777] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Model saved to: micro_llama/output_models/default_model


### Generate Training Script

```python
def make_train_script(
    project_directory,
    config_template=None,
    script_name='train.sh',
    nproc='gpu',
    cuda_devices=None
):
```
Generate a bash training script from a project meta-config

The generated script will be written to 'project_directory' and all paths will be
relative to this location.

- project_directory: The project directory. Assumes meta-config is 'meta_config.yaml'
- script_name: The name of the output script. If none, the script can be specified on the command-line.
- nproc: Number of processes; 'gpu' is number of available GPUs
- cuda_devices: List of CUDA devices to limit training to.  

i.e. If you wish to only CUDA 0 and 1, then "0,1"

```
.../my_project$ ./train.sh
```

In [4]:
# Select name of generaed script.
script_name = 'base_2gpu.sh'

make_train_script(
    project_directory=project_directory,
    config_template=config_template,
    script_name=script_name,
    cuda_devices="0,1")

# Read back to verify
with open(os.path.join(project_directory, script_name), 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display.display(display.Markdown(md))

#### Generated Shell Script
[train.sh](train.sh)
```bash
#!/bin/bash
CUDA_VISIBLE_DEVICES='0,1' torchrun --standalone --nproc-per-node 'gpu' '../../scripts/train_script.py' -p '.' -s '../../src' "base_config.yaml"

```

### Run Script from Notebook
Lauch the training script from the notebook.

Note: The terminal emulation of the notebook is lacking, thus rendering of progress bars may be broken.

In [None]:
print(f"{get_train_cmdline(meta, cuda_devices='0')} '{config_template}'")

In [None]:
!{get_train_cmdline(meta, cuda_devices="0")} '{config_template}'

### View in Tensorboard
Note: If the notebook is running on the same machine as the trainer, remove "--bind_all"

In [None]:
!tensorboard --bind_all --logdir "{config.output_dir}"

#### Generate Bash Script

This will output a shell-script which will invoke the training script with the arguments for this project.

```bash
./train.sh path/to/experiment.yaml
```

If 'cuda_devices' is not None, this can restrict execution to a sub-set of available GPUs.
```python
# Restrict training to GPU's 0 and 1
make_bash_script(metacfg, cuda_devices="0,1")
```

In [None]:
make_bash_script(metacfg, cuda_devices="0")

# Read back to verify
with open('train.sh', 'r') as f:
    md = (
        f"#### Generated Shell Script\n"
        f"[train.sh](train.sh)\n"
        f"```bash\n{f.read()}\n```"
    )
    display.display(display.Markdown(md))

### Cleanup
Note: These will show the target directory and ask for confirmation before proceeding.

#### Delete All

In [None]:
nb.delete_dir(metacfg.model_dir, "Delete all models in project")

#### Delete Configuration Output Directory

In [None]:
nb.delete_dir(config.output_dir, "Delete output directory")