# Training Emely

## Run this noteboook in Jupyter to work with WandB

This notebook is for training Emely with different configurations.
Use the blender_opts dictionary for the standard options.

### Configuration

The default options for training are located in settings/default_blender_opts.json and settings/run_blender_opts.json. The default_blender_opts are assumed to stay unchanged, while the run_blender_opts can be altered for each model instance.

The current options that can be varied between models with default settings are:

- init_model: "zoo:blender/blender_90M/model",
- dict_file: "zoo:blender/blender_90M/model.dict",
- bs: 16,
- betas: "0.9,0.999",
- lr: 1e-06,
- dropout: 0.1,
- inference: "beam",
- beam_size: 10,
- beam_min_length: 10,
- beam_block_ngram: 3,
- wandb_project: "emely-v0.X",
- task: "internal,external,external-gpt3",
- multitask_weights: "6,3,3",
- mutators: null

In [28]:
import json
from parlai.scripts.train_model import TrainModel
from pathlib import Path
from copy import deepcopy
import shutil
import wandb
import torch
torch.cuda.is_available()

False

## Enable WandB

In [16]:
wandb_project_name="emely-v0-4"
wandb.init(project=wandb_project_name)

model_type = "interview" # interview/fika

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mckjellson[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


# Choose number of models

In [23]:
n_models = 1

with open("temp_opts/run_blender_opts.json","r") as file:
    run_blender_opts = json.load(file)
for i in range(n_models):
    with open("temp_opts/model_" + str(i+1) + "_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

### Edit the options in the temp_opts files for the different models, then run training:

In [36]:
def run_training(model_id):

    with open("temp_opts/default_blender_opts.json","r") as file:
        default_blender_opts = json.load(file)

    with open("temp_opts/model_" + str(model_id) + "_opts.json","r") as file:
        run_blender_opts = json.load(file)

    # Set name for file and model run on wandb
    if run_blender_opts["mutators"] is not None:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-{run_blender_opts["mutators"]}-model_{model_id}'

    else:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-model_{model_id}'
    
    #%env WANDB_NAME=$name
    mf = Path.cwd().parents[1].joinpath(f'models/model-runs/{name}/model')
    
    # Finalize training opts
    run_blender_opts["model_file"] = mf.as_posix()
    run_blender_opts["wandb_name"] = name
    run_blender_opts.update(default_blender_opts)

    if run_blender_opts["mutators"] is None:
        del run_blender_opts["mutators"]
    
    TrainModel.main(**run_blender_opts)

    with open(f"models/model-runs/{name}/run_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

    return name

model_names = []

## Run the training in separate cells

In [37]:
model_names.append(run_training(1))

16:29:05 | building dictionary first...
16:29:05 | [33mOverriding opt["init_model"] to zoo:blender/blender_90M/model (previously: /home/ckjellson/code/emely-models/ParlAI/data/models/blender/blender_90M/model)[0m
16:29:05 | [33mOverriding opt["betas"] to (0.9, 0.999) (previously: [0.9, 0.999])[0m
16:29:05 | [33mOverriding opt["wandb_project"] to emely-v0.4 (previously: parlaiemely)[0m
16:29:05 | [33mOverriding opt["multitask_weights"] to (1.0,) (previously: [1.0])[0m
16:29:05 | [33mOverriding opt["wandb_name"] to blender-minimal-1-model_1 (previously: None)[0m
16:29:05 | [33myour model is being loaded with opts that do not exist in the model you are initializing the weights with: download_path: None,verbose: False,datapath: /home/ckjellson/code/emely-models/ParlAI/data,load_from_checkpoint: True,interactive_mode: False[0m
16:29:05 | [33myour model is being loaded with opts that differ from the model you are initializing the weights with. Add the following args to your run 

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
exs/train,1.0
clen/train,18.0
ctrunc/train,0.0
ctrunclen/train,0.0
llen/train,12.0
ltrunc/train,0.0
ltrunclen/train,0.0
loss/train,1.90703
ppl/train,6.73306
token_acc/train,0.5


0,1
exs/train,▁▁▁▁▁
clen/train,▁▁▁▁▁
ctrunc/train,▁▁▁▁▁
ctrunclen/train,▁▁▁▁▁
llen/train,▁▁▁▁▁
ltrunc/train,▁▁▁▁▁
ltrunclen/train,▁▁▁▁▁
loss/train,█▄▁▂▁
ppl/train,█▃▁▂▁
token_acc/train,▁▁███


[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


16:29:15 | training...
16:29:16 | time:301s total_exs:5 total_steps:5 epochs:5.00
    clen  clip  ctpb  ctps  ctrunc  ctrunclen  exps  exs  gnorm  llen  loss    lr  ltpb  ltps  ltrunc  ltrunclen   ppl  \
      18     1    18 29.69       0          0 1.649    1  46.07    12 1.938 1e-06    12 19.79       0          0 6.946   
    token_acc  token_em  total_train_updates  tpb   tps   ups  
        .5000         0                    5   30 49.48 1.653

16:29:16 | creating task(s): internal
16:29:16 | Loading ParlAI text data: /home/ckjellson/code/emely-models/ParlAI/data/internal/valid.txt
16:29:16 | creating task(s): external
16:29:16 | Loading ParlAI text data: /home/ckjellson/code/emely-models/ParlAI/data/external/valid.txt
16:29:16 | running eval: valid


KeyboardInterrupt: 

# Models are trained, now create docker images

# Some utils to change the default files used in this notebook

In [22]:
default_blender_opts = {
    "activation": "gelu",
    "attention_dropout": 0.0,
    "dict_lower": True,
    "dict_tokenizer": "bpe",
    "embedding_size": 512,
    "evaltask": "internal,external",
    "ffn_size": 2048,
    "fp16": True,
    "gradient_clip": 0.1,
    "label_truncate": 128,
    "learn_positional_embeddings": True,
    "lr_scheduler": "reduceonplateau",
    "metrics": "ppl,bleu-4,rouge-L",
    "model": "transformer/generator",
    "n_heads": 16,
    "n_layers": 8,
    "n_positions": 512,
    "optimizer": "adamax",
    "relu_dropout": 0.0,
    "save_after_valid": True,
    "skip_generation": False,
    "stim": 60,
    "tensorboard_log": True,
    "text_truncate": 512,
    "update_freq": 1,
    "variant": "xlm",
    "veps": 0.25,
    "vme": 20000,
    "vmm": "min",
    "vmt": "ppl",
    "vp": 15,
    "wblog": True
}
run_blender_opts = {'init_model': 'zoo:blender/blender_90M/model',
                'dict_file': 'zoo:blender/blender_90M/model.dict',
                'bs': 16,
                'betas': '0.9,0.999',
                'lr': 1e-06,
                'dropout': 0.1,
                'inference': 'beam',
                'beam_size': 10,
                'beam_min_length': 10,
                'beam_block_ngram': 3,
                'wandb_project': 'parlaiemely',
                'task': 'internal,external,external-gpt3',
                'multitask_weights': '6,3,3',
                'mutators': None}

with open("temp_opts/default_blender_opts.json","w") as file:
    json.dump(default_blender_opts,file, sort_keys=True, indent=4)
with open("temp_opts/run_blender_opts.json","w") as file:
    json.dump(run_blender_opts,file, sort_keys=False, indent=4)