# Training Emely

## Run this noteboook in Jupyter to work with WandB

This notebook is for training Emely with different configurations.
Use the blender_opts dictionary for the standard options.

### Configuration

The default options for training are located in settings/default_blender_opts.json and settings/run_blender_opts.json. The default_blender_opts are assumed to stay unchanged, while the run_blender_opts can be altered for each model instance.

The current options that can be varied between models with default settings are:

- init_model: "zoo:blender/blender_90M/model",
- dict_file: "zoo:blender/blender_90M/model.dict",
- bs: 16,
- betas: "0.9,0.999",
- lr: 1e-06,
- dropout: 0.1,
- inference: "beam",
- beam_size: 10,
- beam_min_length: 10,
- beam_block_ngram: 3,
- wandb_project: "emely-v0.X",
- task: "internal,external,external-gpt3",
- multitask_weights: "6,3,3",
- mutators: null

This notebook assumes the structure

- root
    - emely-models
    - emely-testing

# Main options

In [None]:
n_models = 1
wandb_project_name = "emely-v0-4"
model_type = "interview"
trainenv = "emelymodels"    # Name of the training environment
testenv = "emelytesting"    # Name of the testing environment

In [16]:
# Imports
import json
from parlai.scripts.train_model import TrainModel
from pathlib import Path
from copy import deepcopy
import shutil
import wandb
import re
import string
import os
import torch
torch.cuda.is_available()

False

# Choose training settings

In [23]:
os.system(f"conda activate {trainenv}")
with open("temp_opts/run_blender_opts.json","r") as file:
    run_blender_opts = json.load(file)
for i in range(n_models):
    with open("temp_opts/model_" + str(i+1) + "_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

### Edit the options in the temp_opts files for the different models, then run training:

# Initiate WandB

In [None]:
wandb.init(project=wandb_project_name)

# Define training function

In [36]:
def run_training(model_id):

    with open("temp_opts/default_blender_opts.json","r") as file:
        default_blender_opts = json.load(file)

    with open("temp_opts/model_" + str(model_id) + "_opts.json","r") as file:
        run_blender_opts = json.load(file)

    # Set name for file and model run on wandb
    if run_blender_opts["mutators"] is not None:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-{run_blender_opts["mutators"]}-model_{model_id}'

    else:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-model_{model_id}'
    
    #%env WANDB_NAME=$name
    mf = Path.cwd().parents[1].joinpath(f'models/model-runs/{name}/model')
    
    # Finalize training opts
    run_blender_opts["model_file"] = mf.as_posix()
    run_blender_opts["wandb_name"] = name
    run_blender_opts.update(default_blender_opts)

    if run_blender_opts["mutators"] is None:
        del run_blender_opts["mutators"]
    
    TrainModel.main(**run_blender_opts)

    os.system(f"parlai vacuum -mf ../../models/model-runs/{name}/model")

    with open(f"models/model-runs/{name}/run_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

    return name

model_names = []

# Run the training in separate cells

Only the models that are successfully generated will be appended to model_names, and used for testing.

In [None]:
model_names.append(run_training(1))

In [18]:
# Remove this cell!
model_names = ["blender-minimal-1-model_1"]

# Models are trained, now create docker images

In [17]:
image_names = []
for name in model_names:
    # Store the Dockerfile that is used in the model directory
    with open(f"../{model_type}/Dockerfile","r") as file:
        lines = file.readlines()
    lines[4] = f"COPY models/model-runs/{name} ./models/interview-model\n"
    dockerfile = "".join(lines)
    with open(f"../../models/model-runs/{name}/Dockerfile","w") as file:
        file.write(dockerfile)
    
    # Create docker image
    os.system(f"cp ../../models/model-runs/{name}/Dockerfile ../../Dockerfile")
    os.system(f"docker build -t {name} .")
    os.system(f"rm ../../Dockerfile")

    image_names.append(name)


# Run testing

In [None]:
os.system(f"conda activate {testenv}")
os.system("cd ../../../emely-testing")
for name in image_names:
    os.system(f"docker run --name {name} -p 8080:8080 {name}")
    os.system("python main.py")

# --- End of pipeline ---

# Some utils to change the default files used in this notebook

In [22]:
default_blender_opts = {
    "activation": "gelu",
    "attention_dropout": 0.0,
    "dict_lower": True,
    "dict_tokenizer": "bpe",
    "embedding_size": 512,
    "evaltask": "internal,external",
    "ffn_size": 2048,
    "fp16": True,
    "gradient_clip": 0.1,
    "label_truncate": 128,
    "learn_positional_embeddings": True,
    "lr_scheduler": "reduceonplateau",
    "metrics": "ppl,bleu-4,rouge-L",
    "model": "transformer/generator",
    "n_heads": 16,
    "n_layers": 8,
    "n_positions": 512,
    "optimizer": "adamax",
    "relu_dropout": 0.0,
    "save_after_valid": True,
    "skip_generation": False,
    "stim": 60,
    "tensorboard_log": True,
    "text_truncate": 512,
    "update_freq": 1,
    "variant": "xlm",
    "veps": 0.25,
    "vme": 20000,
    "vmm": "min",
    "vmt": "ppl",
    "vp": 15,
    "wblog": True
}
run_blender_opts = {
    'init_model': 'zoo:blender/blender_90M/model',
    'dict_file': 'zoo:blender/blender_90M/model.dict',
    'bs': 16,
    'betas': '0.9,0.999',
    'lr': 1e-06,
    'dropout': 0.1,
    'inference': 'beam',
    'beam_size': 10,
    'beam_min_length': 10,
    'beam_block_ngram': 3,
    'wandb_project': 'parlaiemely',
    'task': 'internal,external,external-gpt3',
    'multitask_weights': '6,3,3',
    'mutators': None
}

with open("temp_opts/default_blender_opts.json","w") as file:
    json.dump(default_blender_opts,file, sort_keys=True, indent=4)
with open("temp_opts/run_blender_opts.json","w") as file:
    json.dump(run_blender_opts,file, sort_keys=False, indent=4)