# Training Emely

## Run this noteboook in Jupyter to work with WandB

This notebook is for training Emely with different configurations.
Use the blender_opts dictionary for the standard options.

### Configuration

The default options for training are located in settings/default_blender_opts.json and settings/run_blender_opts.json. The default_blender_opts are assumed to stay unchanged, while the run_blender_opts can be altered for each model instance.

The current options that can be varied between models with default settings are:

- init_model: "zoo:blender/blender_90M/model",
- dict_file: "zoo:blender/blender_90M/model.dict",
- bs: 16,
- betas: "0.9,0.999",
- lr: 1e-06,
- dropout: 0.1,
- inference: "beam",
- beam_size: 10,
- beam_min_length: 10,
- beam_block_ngram: 3,
- wandb_project: "emely-v0.X",
- task: "internal,external,external-gpt3",
- multitask_weights: "6,3,3",
- mutators: null

This notebook assumes the structure

- root
    - emely-models
    - emely-testing

To avoid conflicts with existing docker images it is recommended to run docker system prune or docker system prune --all before running this notebook.

# Main options

In [2]:
n_models = 1
wandb_project_name = "emely-v0-4"
model_type = "interview"
err = os.system("mkdir ../../models/emely-runs")

In [3]:
# Imports
import json
from parlai.scripts.train_model import TrainModel
from pathlib import Path
from copy import deepcopy
import shutil
import wandb
import time
import re
import string
import os
import subprocess
from subprocess import Popen
import torch
torch.cuda.is_available()

False

# Choose training settings

In [5]:
with open("temp_opts/run_blender_opts.json","r") as file:
    run_blender_opts = json.load(file)
run_blender_opts["wandb_project"] = wandb_project_name
for i in range(n_models):
    with open("temp_opts/model_" + str(i+1) + "_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

### Edit the options in the temp_opts files for the different models, then run training:

# Initiate WandB

In [6]:
wandb.init(project=wandb_project_name)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mckjellson[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


# Define training function

In [7]:
def run_training(model_id):

    with open("temp_opts/default_blender_opts.json","r") as file:
        default_blender_opts = json.load(file)

    with open("temp_opts/model_" + str(model_id) + "_opts.json","r") as file:
        run_blender_opts = json.load(file)

    # Set name for file and model run on wandb
    if run_blender_opts["mutators"] is not None:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-{run_blender_opts["mutators"]}-model-{model_id}'

    else:
        name = f'blender-{run_blender_opts["task"]}-{run_blender_opts["multitask_weights"]}-model-{model_id}'
    
    #%env WANDB_NAME=$name
    mf = Path.cwd().parents[1].joinpath(f'models/emely-runs/{name}/model')
    
    # Finalize training opts
    run_blender_opts["model_file"] = mf.as_posix()
    run_blender_opts["wandb_name"] = name
    run_blender_opts.update(default_blender_opts)

    # Uncomment the following line to run for one epoch during testing
    run_blender_opts["eps"] = 1

    if run_blender_opts["mutators"] is None:
        del run_blender_opts["mutators"]
    
    TrainModel.main(**run_blender_opts)

    os.system(f"parlai vacuum -mf ../../models/emely-runs/{name}/model")

    with open(f"../../models/emely-runs/{name}/run_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

    return name

model_names = []

# Run the training in separate cells

Only the models that are successfully generated will be appended to model_names, and used for testing.

In [8]:
model_names.append(run_training(1))

17:02:37 | building dictionary first...
17:02:37 | No model with opt yet at: /home/ckjellson/code/emely-models/models/emely-runs/blender-minimal-1-model-1/model(.opt)
17:02:37 | [33myour model is being loaded with opts that do not exist in the model you are initializing the weights with: allow_missing_init_opts: False,download_path: None,loglevel: info,dynamic_batching: None,verbose: False,is_debug: False,datapath: /home/ckjellson/code/emely-models/ParlAI/data,eval_dynamic_batching: None,num_workers: 0,max_train_steps: -1,log_every_n_steps: 50,validation_every_n_steps: -1,load_from_checkpoint: True,tensorboard_logdir: None,wandb_log: True,wandb_name: blender-minimal-1-model-1,wandb_project: emely-v0-4,wandb_entity: None,mutators: None,n_encoder_layers: -1,n_decoder_layers: -1,model_parallel: False,beam_block_full_context: True,beam_delay: 30,beam_block_list_filename: None,temperature: 1.0,interactive_mode: False,history_reversed: False,history_add_global_end_token: None,special_tok_ls

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


17:02:46 | training...
17:02:47 | time:9s total_exs:16 total_steps:1 epochs:16.00 time_left:0s
    clen  clip  ctpb  ctps  ctrunc  ctrunclen  exps  exs  gnorm  llen  loss    lr  ltpb  ltps  ltrunc  ltrunclen   ppl  \
      18     1   288 209.9       0          0 11.61   16  47.43    12 2.106 1e-06   192 139.9       0          0 8.219   
    token_acc  token_em  total_train_updates  tpb   tps   ups  
        .4531         0                    1  480 349.8 .7297

17:02:47 | num_epochs completed:1.0 time elapsed:8.552588701248169s
17:02:47 | Saving dictionary to /home/ckjellson/code/emely-models/models/emely-runs/blender-minimal-1-model-1/model.dict
17:02:49 | [33mOverriding opt["init_model"] to zoo:blender/blender_90M/model (previously: /home/ckjellson/code/emely-models/ParlAI/data/models/blender/blender_90M/model)[0m
17:02:49 | [33mOverriding opt["betas"] to (0.9, 0.999) (previously: [0.9, 0.999])[0m
17:02:49 | [33mOverriding opt["multitask_weights"] to (1.0,) (previously: [1.0])[

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


17:02:53 | [33mROUGE requires nltk punkt tokenizer. Please run `python -c "import nltk; nltk.download('punkt')`[0m
17:03:19 | eval completed in 28.63s
17:03:19 | [1mvalid:
             accuracy    bleu-4  clen  ctpb  ctps  ctrunc  ctrunclen  exps  exs    f1  llen  loss    lr  ltpb  ltps  ltrunc  \
   all              0 1.448e-06 53.69 635.2 279.9       0          0 6.218  171 .1321 14.68 2.825 1e-06   190 83.73       0   
   external         0 1.846e-06 62.36                   0          0         44 .1261 15.89 3.094                         0   
   internal         0 1.049e-06 45.02                   0          0        127 .1380 13.46 2.556                         0   
             ltrunclen   ppl  token_acc  token_em  total_train_updates   tpb   tps  
   all               0 17.47      .4262         0                    1 825.2 363.6  
   external          0 22.06      .3991         0                                   
   internal          0 12.89      .4532         0
[0m
17:03:1

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
exs/train,16.0
clen/train,18.0
ctrunc/train,0.0
ctrunclen/train,0.0
llen/train,12.0
ltrunc/train,0.0
ltrunclen/train,0.0
loss/train,2.10648
ppl/train,8.21923
token_acc/train,0.45312


0,1
exs/train,▁
clen/train,▁
ctrunc/train,▁
ctrunclen/train,▁
llen/train,▁
ltrunc/train,▁
ltrunclen/train,▁
loss/train,▁
ppl/train,▁
token_acc/train,▁


# Models are trained, now create docker images

In [10]:
image_names = []
for name in model_names:
    try:
        # Store the Dockerfile that is used in the model directory
        with open(f"../{model_type}/Dockerfile","r") as file:
            lines = file.readlines()
        lines[4] = f"COPY models/emely-runs/{name} ./models/interview-model\n"
        dockerfile = "".join(lines)
        with open(f"../../models/emely-runs/{name}/Dockerfile","w") as file:
            file.write(dockerfile)
        
        # Create docker image
        err = os.system(f"cp ../../models/emely-runs/{name}/Dockerfile ../../Dockerfile")
        err = os.system(f"docker build -t {name} ../..")
        err = os.system(f"rm ../../Dockerfile")

        image_names.append(name)
    except:
        print(f"Error generating docker-image for model {name}")


0
0
0


# Run testing (not yet working)

In [5]:
tested_names = []
image_names = "blender-minimal-1-model_1"
#print(os.system(f"conda activate {testenv}"))
for name in image_names:
    #try:
    p1 = Popen("/bin/bash", stdin=subprocess.PIPE, stdout=subprocess.PIPE, encoding='utf8')
    p2 = Popen("/bin/bash", stdin=subprocess.PIPE, stdout=subprocess.PIPE, encoding='utf8')

    out,err = p1.communicate(f"docker run --name {name} -p 8080:8080 {name}")
    print(out)
    print(err)
    time.sleep(5)
    out,err = p2.communicate(f"conda activate {testenv} ; python ../../../emely-testing/main.py")
    print(err)
    print(out)
    while p2.poll() is not None:
        out = p2.stdout
        #print(out)
    print(out)

    p2.kill()
    p1.kill()
    # p2 = Popen("python ../../../emely-testing/main.py")

    # while True:
    #     if p2.poll() is None:
    #         break

    print(os.system(f"docker stop {name}"))
    print(os.system(f"docker rm {name}"))
    # tested_names.append(name)
    #except:
    #    print(f"Error testing model {name}")

#docker run --name blender-minimal-1-model_1  -p 8080:8080 blender-minimal-1-model_1


None


# Final clean-up

In [19]:
print(f"Successfully trained, dockerized and tested models:")
for name in tested_names:
    print(f"{name}")

Successfully trained, dockerized and tested models:
skej
skej


In [18]:
for i in range(len(model_names)):
    os.system(f"rm temp_opts/model_{str(i+1)}_opts.json")

# --- End of pipeline ---

# Some utils to change the default files used in this notebook

In [22]:
default_blender_opts = {
    "activation": "gelu",
    "attention_dropout": 0.0,
    "dict_lower": True,
    "dict_tokenizer": "bpe",
    "embedding_size": 512,
    "evaltask": "internal,external",
    "ffn_size": 2048,
    "fp16": True,
    "gradient_clip": 0.1,
    "label_truncate": 128,
    "learn_positional_embeddings": True,
    "lr_scheduler": "reduceonplateau",
    "metrics": "ppl,bleu-4,rouge-L",
    "model": "transformer/generator",
    "n_heads": 16,
    "n_layers": 8,
    "n_positions": 512,
    "optimizer": "adamax",
    "relu_dropout": 0.0,
    "save_after_valid": True,
    "skip_generation": False,
    "stim": 60,
    "tensorboard_log": True,
    "text_truncate": 512,
    "update_freq": 1,
    "variant": "xlm",
    "veps": 0.25,
    "vme": 20000,
    "vmm": "min",
    "vmt": "ppl",
    "vp": 15,
    "wblog": True
}
run_blender_opts = {
    'init_model': 'zoo:blender/blender_90M/model',
    'dict_file': 'zoo:blender/blender_90M/model.dict',
    'bs': 16,
    'betas': '0.9,0.999',
    'lr': 1e-06,
    'dropout': 0.1,
    'inference': 'beam',
    'beam_size': 10,
    'beam_min_length': 10,
    'beam_block_ngram': 3,
    'wandb_project': 'parlaiemely',
    'task': 'internal,external,external-gpt3',
    'multitask_weights': '6,3,3',
    'mutators': None
}

with open("temp_opts/default_blender_opts.json","w") as file:
    json.dump(default_blender_opts,file, sort_keys=True, indent=4)
with open("temp_opts/run_blender_opts.json","w") as file:
    json.dump(run_blender_opts,file, sort_keys=False, indent=4)