configs/mtl_3task.json

{
  "dataset_reader": {
    "class_name": "multitask_reader",
    "task_defaults": {
      "class_name": "huggingface_dataset_reader",
      "path": "glue",
      "train": "train",
      "valid": "validation",
      "test": "test"
    },
    "tasks": {
      "rte": {"name": "rte"},
      "copa": {
        "path": "super_glue",
        "name": "copa"
      },
      "conll": {
        "class_name": "conll2003_reader",
        "use_task_defaults": false,
        "data_path": "{DOWNLOADS_PATH}/conll2003/",
        "dataset_name": "conll2003",
        "provide_pos": false
      }
    }
  },
  "dataset_iterator": {
    "class_name": "multitask_iterator",
    "num_train_epochs": "{NUM_TRAIN_EPOCHS}",
    "gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
    "seed": 42,
    "task_defaults": {
      "class_name": "huggingface_dataset_iterator",
      "label": "label",
      "use_label_name": false,
      "seed": 42
    },
    "tasks": {
      "rte": {
        "features": ["sentence1", "sentence2"]
      },
      "copa": {
        "features": ["contexts", "choices"]
      },
      "conll": {
        "class_name": "basic_classification_iterator",
        "seed": 42,
        "use_task_defaults": false
      }
    }
  },
  "chainer": {
    "in": ["x_rte", "x_copa", "x_conll"],
    "in_y": ["y_rte", "y_copa", "y_conll"],
    "pipe": [
      {
        "class_name": "multitask_pipeline_preprocessor",
        "possible_keys_to_extract": [0, 1],
        "preprocessors": [
          "TorchTransformersPreprocessor",
          "TorchTransformersMultiplechoicePreprocessor",
          "TorchTransformersNerPreprocessor"
        ],
        "do_lower_case": true,
        "n_task": 3,
        "vocab_file": "{BACKBONE}",
        "max_seq_length": 200,
        "max_subword_length": 15,
        "token_masking_prob": 0.0,
        "return_features": true,
        "in": ["x_rte", "x_copa", "x_conll"],
        "out": [
          "bert_features_rte",
          "bert_features_copa",
          "bert_features_conll"
        ]
      },
      {
        "id": "vocab_conll",
        "class_name": "simple_vocab",
        "unk_token": ["O"],
        "pad_with_zeros": true,
        "save_path": "{MODELS_PATH}/tag.dict",
        "load_path": "{MODELS_PATH}/tag.dict",
        "fit_on": ["y_conll"],
        "in": ["y_conll"],
        "out": ["y_ids_conll"]
      },
	  {
        "id": "multitask_transformer",
        "class_name": "multitask_transformer",
        "optimizer_parameters": {"lr": 2e-5},
        "gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
        "learning_rate_drop_patience": 2,
        "learning_rate_drop_div": 2.0,
        "return_probas": true,
        "backbone_model": "{BACKBONE}",
        "save_path": "{MODEL_PATH}",
        "load_path": "{MODEL_PATH}",
        "tasks": {
          "rte": {
            "type": "classification",
            "options": 2
          },
          "copa": {
            "type": "multiple_choice",
            "options": 2
          },
          "conll": {
            "type": "sequence_labeling",
            "options": "#vocab_conll.len"
          }
        },
        "in": [
          "bert_features_rte",
          "bert_features_copa",
          "bert_features_conll"
        ],
        "in_y": ["y_rte", "y_copa", "y_ids_conll"],
        "out": [
          "y_rte_pred_probas",
          "y_copa_pred_probas",
          "y_conll_pred_ids"
        ]
      },
      {
        "in": ["y_rte_pred_probas"],
        "out": ["y_rte_pred_ids"],
        "class_name": "proba2labels",
        "max_proba": true
      },
      {
        "in": ["y_copa_pred_probas"],
        "out": ["y_copa_pred_ids"],
        "class_name": "proba2labels",
        "max_proba": true
      },
      {
        "in": ["y_conll_pred_ids"],
        "out": ["y_conll_pred_labels"],
        "ref": "vocab_conll"
      }
    ],
    "out": ["y_rte_pred_ids", "y_copa_pred_ids", "y_conll_pred_labels"]
  },
  "train": {
    "epochs": "{NUM_TRAIN_EPOCHS}",
    "batch_size": 32,
    "metrics": [
      {
        "name": "multitask_accuracy",
        "inputs": ["y_rte", "y_copa", "y_rte_pred_ids", "y_copa_pred_ids"]
      },
      {
        "name": "ner_f1",
        "inputs": ["y_conll", "y_conll_pred_labels"]
      },
      {
        "name": "ner_token_f1",
        "inputs": ["y_conll", "y_conll_pred_labels"]
      },
      {
        "name": "accuracy",
        "alias": "accuracy_rte",
        "inputs": ["y_rte", "y_rte_pred_ids"]
      },
      {
        "name": "accuracy",
        "alias": "accuracy_copa",
        "inputs": ["y_copa", "y_copa_pred_ids"]
      },
    ],
    "validation_patience": 3,
    "log_every_n_epochs": 1,
    "show_examples": false,
    "evaluation_targets": ["valid"],
    "class_name": "torch_trainer",
    "pytest_max_batches": 2
  },
  "metadata": {
    "variables": {
      "ROOT_PATH": "~/.deeppavlov",
      "MODELS_PATH": "{ROOT_PATH}/models/multitask_example",
      "DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
      "BACKBONE": "distilbert-base-uncased",
      "MODEL_PATH": "{MODELS_PATH}/{BACKBONE}_3task",
      "NUM_TRAIN_EPOCHS": 5,
      "GRADIENT_ACC_STEPS": 1
    }
  }
}