# Training Emely

## Run this noteboook in Jupyter to work with WandB

This notebook is for training Emely with different configurations.
Use the blender_opts dictionary for the standard options.

### Configuration

#### Base Config
We'll call the base configuration "Blender base config" and it's the blender 90M model fine tuned on the internal and external tasks

### Required config for a run

- task
- multitask_weights
- model_file

### Optional config
- mutators
- lr


### Different mutators for different tasks?
--task internal:mutators=word_shuffle,internal:mutators=last_turn


### Evaluation
All models are evaluated on the internal and external tasks

In [15]:
import json
from parlai.scripts.train_model import TrainModel
from pathlib import Path
from copy import deepcopy
import shutil
import wandb
import torch
torch.cuda.is_available()

False

## Enable WandB

In [16]:
wandb.init(project="emely-v0-4")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mckjellson[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


# Choose number of models

In [17]:
n_models = 3

with open("temp_opts/run_blender_opts.json","r") as file:
    run_blender_opts = json.load(file)
for i in range(n_models):
    with open("temp_opts/model_" + str(i+1) + "_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

### Edit the options in the temp_opts files for the different models, then run training:

In [13]:
def run_training(model_id):

    with open("temp_opts/default_blender_opts.json","r") as file:
        default_blender_opts = json.load(file)

    with open("temp_opts/model_" + str(model_id) + "_opts.json","r") as file:
        run_blender_opts = json.load(file)

    # Set name for file and model run on wandb
    if run_blender_opts["mutators"] is not None:
        name = f'blender-{run_blender_opts["tasks"]}-{run_blender_opts["weights"]}-{run_blender_opts["mutators"]}-model_{model_id}'

    else:
        name = f'blender-{run_blender_opts["tasks"]}-{run_blender_opts["weights"]}-model_{model_id}'
    
    #%env WANDB_NAME=$name
    mf = Path.cwd().parents[1].joinpath(f'models/model-runs/{name}/model')
    
    # Finalize training opts
    run_blender_opts["model_file"] = mf.as_posix()
    run_blender_opts.update(default_blender_opts)

    TrainModel.main(**run_blender_opts)

    with open(f"models/model-runs/{name}/run_opts.json","w") as file:
        json.dump(run_blender_opts, file, sort_keys=False, indent=4)

## Run the training in separate cells

In [None]:
run_training(1)

In [None]:
run_training(2)

In [None]:
run_training(3)

# Models are trained, now create docker images

# Some utils to change the default files used in this notebook

In [10]:
default_blender_opts = {
    "activation": "gelu",
    "attention_dropout": 0.0,
    "dict_lower": True,
    "dict_tokenizer": "bpe",
    "embedding_size": 512,
    "evaltask": "internal,external",
    "ffn_size": 2048,
    "fp16": True,
    "gradient_clip": 0.1,
    "label_truncate": 128,
    "learn_positional_embeddings": True,
    "lr_scheduler": "reduceonplateau",
    "metrics": "ppl,bleu-4,rouge-L",
    "model": "transformer/generator",
    "n_heads": 16,
    "n_layers": 8,
    "n_positions": 512,
    "optimizer": "adamax",
    "relu_dropout": 0.0,
    "save_after_valid": True,
    "skip_generation": False,
    "stim": 60,
    "tensorboard_log": True,
    "text_truncate": 512,
    "update_freq": 1,
    "variant": "xlm",
    "veps": 0.25,
    "vme": 20000,
    "vmm": "min",
    "vmt": "ppl",
    "vp": 15,
    "wblog": True
}
run_blender_opts = {'init_model': 'zoo:blender/blender_90M/model',
                'dict_file': 'zoo:blender/blender_90M/model.dict',
                'bs': 16,
                'betas': '0.9,0.999',
                'lr': 1e-06,
                'dropout': 0.1,
                'inference': 'beam',
                'beam_size': 10,
                'beam_min_length': 10,
                'beam_block_ngram': 3,
                'wandb_project': 'parlaiemely',
                'task': 'internal,external,external-gpt3',
                'multitask_weights': '6,3,3',
                'mutators': None}

with open("temp_opts/default_blender_opts.json","w") as file:
    json.dump(default_blender_opts,file, sort_keys=True, indent=4)
with open("temp_opts/run_blender_opts.json","w") as file:
    json.dump(run_blender_opts,file, sort_keys=False, indent=4)