# Master notebook
Designed to run 3 "children" notebooks to execute:
1. Supervised Fine Tuning (SFT)
2. Generating AI feedback data
3. Reward Modelling (RM) and Reinforcement Learning (RL)

In [None]:
# absl-py-2.1.0 accelerate-0.27.2 aiohttp-3.9.3 aiosignal-1.3.1 click-8.1.7 datasets-2.17.1 dill-0.3.8 entrypoints-0.4 evaluate-0.4.1 filelock-3.13.1 frozenlist-1.4.1 fsspec-2023.10.0 huggingface-hub-0.20.3 joblib-1.3.2 loralib-0.1.2 mpmath-1.3.0 multidict-6.0.5 multiprocess-0.70.16 networkx-3.2.1 nltk-3.8.1 numpy-1.26.4 pandas-2.2.1 papermill-2.5.0 peft-0.8.2 pillow-10.2.0 pyarrow-15.0.0 pyarrow-hotfix-0.6 regex-2023.12.25 responses-0.18.0 rouge_score-0.1.2 safetensors-0.4.2 sympy-1.12 tenacity-8.2.3 tokenizers-0.15.2 torch-2.2.1 torchaudio-2.2.1 torchvision-0.17.1 tqdm-4.66.2 transformers-4.38.1 tzdata-2024.1 xxhash-3.4.1 yarl-1.9.4

In [1]:
# !pip install torch torchvision torchaudio transformers datasets evaluate rouge_score loralib peft papermill -U

Collecting torch
  Downloading torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl.metadata (25 kB)
Collecting torchvision
  Downloading torchvision-0.17.2-cp39-cp39-macosx_10_13_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio
  Downloading torchaudio-2.2.2-cp39-cp39-macosx_10_13_x86_64.whl.metadata (6.4 kB)
Collecting transformers
  Downloading transformers-4.40.1-py3-none-any.whl.metadata (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.0/138.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting loralib
  Downloading loralib-0.1.2-py3-none-any.whl.metadata (15 kB)
Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Downloading tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl.metadata (6.7 kB)
Collecting huggin

In [2]:
import torch
import uuid
import papermill as pm
import pathlib
from nyx.constants import COMMON_OUTPUT_PATHS

cwd = pathlib.Path().resolve()

print(cwd)

/Users/owner/PycharmProjects/ai-msc-dissertation/notebooks


# Note!
Setting the DEVICE parameter detemerines what resource the model will be trained on, e.g., CPU, GPU or ARM architecture GPU referred to as MPS.

In [4]:
### run parameters

PRECISION_NAME = 'float32'
DEVICE = "mps"  # 0 if torch.cuda.is_available() else "cpu"

# Inputting small models for testing efficiency.
CHOSEN_MODEL = "bigscience/mt0-small"  # "bigscience/mt0-base"
LABELLER_MODEL = "bigscience/mt0-small"  # "google/flan-t5-large"
TESTING = True
RANDOM_SEED = 42
RUN_ID = uuid.uuid4().hex

common_path = COMMON_OUTPUT_PATHS.format(RUN_ID=RUN_ID)
print(common_path)
sft_notebook_config = {
    'PRECISION_NAME': PRECISION_NAME,
    'DEVICE': DEVICE,
    'CHOSEN_MODEL': CHOSEN_MODEL,
    'TESTING': TESTING,
    'RUN_ID': RUN_ID,
}

labelling_notebook_config = {
    'PRECISION_NAME': PRECISION_NAME,
    'DEVICE': DEVICE,
    'LABELLER_MODEL': LABELLER_MODEL,
    'TESTING': TESTING,
    'RUN_ID': RUN_ID,
    'RANDOM_SEED': RANDOM_SEED,
}

rlhf_notebook_config = {
    'PRECISION_NAME': PRECISION_NAME,
    'DEVICE': DEVICE,
    'CHOSEN_MODEL': CHOSEN_MODEL,
    'TESTING': TESTING,
    'RUN_ID': RUN_ID,
    'RANDOM_SEED': RANDOM_SEED,
}

./experiments/705e5ac9f4464aed9df57f70a6287c3b


In [7]:
help(pm.execute_notebook)

Help on function execute_notebook in module papermill.execute:

execute_notebook(input_path, output_path, parameters=None, engine_name=None, request_save_on_cell_execute=True, prepare_only=False, kernel_name=None, language=None, progress_bar=True, log_output=False, stdout_file=None, stderr_file=None, start_timeout=60, report_mode=False, cwd=None, **engine_kwargs)
    Executes a single notebook locally.
    
    Parameters
    ----------
    input_path : str or Path or nbformat.NotebookNode
        Path to input notebook or NotebookNode object of notebook
    output_path : str or Path or None
        Path to save executed notebook. If None, no file will be saved
    parameters : dict, optional
        Arbitrary keyword arguments to pass to the notebook parameters
    engine_name : str, optional
        Name of execution engine to use
    request_save_on_cell_execute : bool, optional
        Request save notebook after each cell execution
    autosave_cell_every : int, optional
        H

In [8]:
# Can write a function and for loop these notebook. Will be especially useful
# when a new parameter manipulates the prompting style.

# Notebook 1
pm.execute_notebook(
    input_path=f'{cwd}/01-sft-modelling-reddit-summarisation.ipynb',
    output_path=f'{common_path}/notebooks/05-sft-modelling-reddit-summarisation.ipynb',
    parameters=sft_notebook_config,
    progress_bar=True,
)

## Notebook 2
pm.execute_notebook(
    input_path=f'{cwd}/02-ai-label-data-generation.ipynb',
    output_path=f'{common_path}/notebooks/06-ai-label-data-generation.ipynb',
    parameters=labelling_notebook_config,
    progress_bar=True,
)

# Notebook 3
pm.execute_notebook(
    input_path=f'{cwd}/03-reinforcement-learning-from-ai-or-human-feedback.ipynb',
    output_path=f'{common_path}/notebooks/07-reinforcement-learning-from-ai-or-human-feedback.ipynb',
    parameters=rlhf_notebook_config,
    progress_bar=True,
)

Passed unknown parameter: PRECISION_NAME
Passed unknown parameter: DEVICE
Passed unknown parameter: CHOSEN_MODEL
Passed unknown parameter: TESTING
Input notebook does not contain a cell with tag 'parameters'
Executing:  87%|█████████████████████████████████████████████████████████████████████████████████████████▋             | 27/31 [03:07<00:41, 10.47s/cell]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Executing: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [03:15<00:00,  6.30s/cell]
Passed unknown parameter: PRECISION_NAME
Passed unknown parameter: DEVICE
Passed unknown parameter: LABELLER_MODEL
Passed unknown parameter: TESTING
Passed unknown parameter: RANDOM_SEED
Input notebook 

{'cells': [{'id': '90995387',
   'cell_type': 'code',
   'metadata': {'tags': ['injected-parameters'],
    'papermill': {'exception': False,
     'start_time': '2024-02-23T23:02:24.802051',
     'end_time': '2024-02-23T23:02:24.847721',
     'duration': 0.04567,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2024-02-23T23:02:24.814453Z',
     'iopub.execute_input': '2024-02-23T23:02:24.814901Z',
     'iopub.status.idle': '2024-02-23T23:02:24.846427Z',
     'shell.execute_reply': '2024-02-23T23:02:24.845254Z'}},
   'execution_count': 1,
   'source': '# Parameters\nPRECISION_NAME = "float32"\nDEVICE = "mps"\nCHOSEN_MODEL = "bigscience/mt0-small"\nTESTING = True\nRANDOM_SEED = 42\n',
   'outputs': []},
  {'cell_type': 'markdown',
   'id': '02d7ae29-e41c-42ad-a5ce-ded3227265e9',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2024-02-23T23:02:24.851065',
     'end_time': '2024-02-23T23:02:24.853866',
     'duration': 0.002801,
   

In [None]:
# encoder -decoder related lines

## Notebook 3
# in tokenize_function()
# command 7 line 29ish

## Next steps
Test **prompt diversification technqiues**:
- set up prompt changing in labeling
- set up prompt sampling in RM prompting.
  - try combinations of above.
- set up critique revisions

- try sampling thought and answers according to some criteria such as embedding etc. see miro.
- try Tree of Thought and/or other.
- look at papers chain of trees and chain of tables.

Code changes:
- implement above in notebooks 2 and 3 as appropriate.
- in notebook 3 compute RM improvement w.r.t. to dataset used for training (utilise checkpoints).
- track evaluation metrics per "runId".

# END