# Reproduce the Competition Results: Predict
Please take a look in the Readme to setup the data and checkpoints

In [14]:
# If on Colab
!pip install -q pytorch-lightning==1.6.4 neptune-client transformers sentencepiece

^C


In [1]:
import pandas as pd
import numpy as np

from tqdm.auto import tqdm

import torch

from transformers import AutoTokenizer

import pytorch_lightning as pl

import pickle

RANDOM_SEED = 42
COLAB = True

pl.seed_everything(RANDOM_SEED)

Global seed set to 42


42

In [None]:
torch.cuda.is_available()

Connect to GoogleDrive if running on Colab

In [None]:
if COLAB:
    import os
    os.getcwd()
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
cd ./drive/MyDrive/human_value/human_value_detector

In [None]:
!git pull

## Import Pytorch-Lightning Model and set paths to model-checkpoints.
These Models are ensembled together to make the prediction on the test-file

In [1]:
from data_modules.BertDataModule import BertDataset
from models.BertFineTunerPl import BertFineTunerPl

Here we define the Models that we want to ensemble. Download the Models used for the submission and place them in the checkpoint folder. Here you can then specify the path in to them in order to reproduce the results.  (If you want to ensemble different combinations just select them here. If you have own models trained then you can place them here too, but you need to ensure the params are loaded (see below)).

In [None]:
PARAMS_ENSEMBLE = {
    "MODEL_CHECKPOINTS": ['./checkpoints/HCV-409-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-408-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-406-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-402-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-403-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-405-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-364-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-366-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-368-microsoft-deberta-large-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-371-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-372-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt',
                          './checkpoints/HCV-375-danschr-roberta-large-BS_16-EPOCHS_8-LR_5e-05-ACC_GRAD_2-MAX_LENGTH_165-BS_8-LR_2e-05-HL_None-DROPOUT_None-SL_None.ckpt'
                          ],
    "DESCRIPTION":"FULL #3xDebL_F1 3EP 3xdanRobL_F1 3EP 3xDebL_Loss 3EP 3xdanRobL_Loss 3EP",
    "TEST_PATH" : "./data/arguments-test.tsv",
    "MAX_THRESHOLD_METRIC": "custom",
    "ENSEMBLE": "EN",
    "ENSEMBLE_THRESHOLD":0.26, #You find the optimal threshold in the paper (or calculate in the ensemble_eval_and_predict.ipynb)
    "LABEL_COLUMNS":['Self-direction: thought',
                     'Self-direction: action',
                     'Stimulation',
                     'Hedonism',
                     'Achievement',
                     'Power: dominance',
                     'Power: resources',
                     'Face',
                     'Security: personal',
                     'Security: societal',
                     'Tradition',
                     'Conformity: rules',
                     'Conformity: interpersonal',
                     'Humility',
                     'Benevolence: caring',
                     'Benevolence: dependability',
                     'Universalism: concern',
                     'Universalism: nature',
                     'Universalism: tolerance',
                     'Universalism: objectivity']
}

We extract the identifier e.g "HCV-409" from the checkpoint paths.

In [None]:
# Get the Identifier within Model-Path. (Used to get corresponding PARAMS File)
NAME = ""
ids = []
for elem in PARAMS_ENSEMBLE["MODEL_CHECKPOINTS"]:
    text_list = elem.split("checkpoints/")[1]
    text_list = text_list.split("-")
    id = text_list[0]+"-" + text_list[1]
    ids.append(id)
    NAME= NAME + "_" + id
    print(text_list[0]+"-" + text_list[1])
NAME = PARAMS_ENSEMBLE["ENSEMBLE"]+"_"+NAME[1:]

PARAMS_ENSEMBLE["IDS"] = ids
LABEL_COLUMNS = PARAMS_ENSEMBLE["LABEL_COLUMNS"]

## The Ensemble List

Take IDs that have been generated and get the params_file with the same id

In [None]:
#Loading the parameters for each model
PARAMS_LIST = []
for id in PARAMS_ENSEMBLE["IDS"]:
    with open(f'./checkpoints/{id}_PARAMS.pkl', 'rb') as f:
        loaded_dict = pickle.load(f)
        PARAMS_LIST.append(loaded_dict)

We group together the checkpoint and parameters in a list

In [2]:
# Concatenating relevant information into one Ensemble_list: Parameters, Id, and Path to Checkpoint.
ENSEMBLE_LIST = []
for param, id, mc in zip(PARAMS_LIST, PARAMS_ENSEMBLE["IDS"], PARAMS_ENSEMBLE["MODEL_CHECKPOINTS"]):
    ENSEMBLE_LIST.append({"PARAMS":param, "ID":id,"MODEL_CHECKPOINT":mc})

# Predict The Submission Tetst File

Load test-data from Path specified in Params above (./data/arguments-test.tsv) for official submission.  But can be other test-files if you have some).
Prepare text column by concatenating premise, stance and conclusion


In [None]:
test_df_input = pd.read_csv(PARAMS_ENSEMBLE["TEST_PATH"], sep='\t')
test_df_input["text"] = test_df_input["Premise"]+" " + test_df_input["Stance"]+ " " + test_df_input["Conclusion"]

In [None]:
def predict_unseen_data(trained_model, data):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    trained_model = trained_model.to(device)

    test_dataset = BertDataset(
        data=data,
        tokenizer=TOKENIZER,
        max_token_count=PARAMS["MAX_TOKEN_COUNT"],
    )

    predictions = []

    for item in tqdm(test_dataset):
        _, prediction = trained_model(
            item["input_ids"].unsqueeze(dim=0).to(device),
            item["attention_mask"].unsqueeze(dim=0).to(device)
        )
        predictions.append(prediction.flatten())

    predictions = torch.stack(predictions).detach().cpu()

    return predictions

We iterate over the Models in the Ensemble List and get the predictions for the test-dataset for each model

In [None]:
# Iterate over elements in Ensemble_List and get predictions from each model. Collect them in predictions [] list.
predictions = []
for idx, elem in enumerate(ENSEMBLE_LIST):
    print(f"Starting with model {elem['MODEL_CHECKPOINT']}")
    PARAMS = elem["PARAMS"]
    trained_model = BertFineTunerPl.load_from_checkpoint(
        elem["MODEL_CHECKPOINT"],
        params=PARAMS,
        label_columns=LABEL_COLUMNS,
        n_classes=len(LABEL_COLUMNS)
    )
    trained_model.eval()
    trained_model.freeze()
    print(f"With Tokenizer {PARAMS['MODEL_PATH']}")
    TOKENIZER = AutoTokenizer.from_pretrained(PARAMS["MODEL_PATH"])
    pred = predict_unseen_data(trained_model=trained_model, data=test_df_input)
    predictions.append(pred)

For each sample we now have 12 predictions. We stack the predictions together and then take the average.

In [None]:
predictions = torch.stack(predictions).numpy()
predictions_avg = np.mean(predictions, axis=0)

Binarize the Output with the optimal decision threshold (previously defined)

In [None]:
upper, lower = 1, 0

# Use optimal decision threshold.
y_pred = np.where(predictions_avg > PARAMS_ENSEMBLE["ENSEMBLE_THRESHOLD"], upper, lower)

Create test-file

In [None]:
prediction_dictionary = {}
prediction_dictionary["Argument ID"] = test_df_input["Argument ID"]
for idx, l_name in enumerate(LABEL_COLUMNS):
    prediction_dictionary[l_name]=y_pred[:,idx]

test_prediction_df = pd.DataFrame(prediction_dictionary)
test_prediction_df.head()

In [None]:
test_prediction_df.to_csv(f"./submission_test.tsv", sep="\t", index=False)