In [None]:
!git clone https://github.com/cher-liang/Gravitas-NLP
%cd Gravitas-NLP

d:\Dev\Gravitas-NLP\Gravitas-NLP


Cloning into 'Gravitas-NLP'...
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [None]:
!pip install sentence-transformers

In [None]:
!git checkout restart

In [None]:
!7z x datasets.zip

In [None]:
import pandas as pd
import numpy as np

import torch
from torch.utils.data import DataLoader

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

import logging 
from typing import List, Dict 
from datetime import datetime
import math

from EnsembleEncoder import EnsembleEncoder
from CustomEvaluator import CECustomEvaluator
# from DualCrossEncoder import CrossEncoder
# from DualEvaluator import DCECustomEvaluator

  from tqdm.autonotebook import tqdm, trange


In [None]:
logging.basicConfig(
    format="%(asctime)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO, 
)
logger = logging.getLogger(__name__)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class GravitasData:
    def __init__(self, row) -> None:
        self.sentence_pair = (row["reference_answer"],row["answer"])
        self.score = row["normalized_score"]
        self.dataset = row["source"]
    
    def __str__(self) -> str:
        string = "Sentence Pair: {}\n".format(self.sentence_pair)
        string += "Score: {}\t Dataset: {}\n".format(self.score,self.dataset)
        return string


In [None]:
def removeIrrelevantQuestions(df:pd.DataFrame):
    return df[~ df.question.isin(["Why?","Explain your reasoning.","Why not?","Why did it happen?",])]

In [None]:
train_semeval_df = pd.read_excel("datasets/semeval.xlsx")

test_semeval_df = pd.read_excel("datasets/test/semeval_unseen_domains.xlsx")
dev_semeval_df1 = pd.read_excel("datasets/develop/semeval_unseen_answers.xlsx")
dev_semeval_df2 = pd.read_excel("datasets/develop/semeval_unseen_questions.xlsx")

train_semeval_df = removeIrrelevantQuestions(train_semeval_df)
test_semeval_df = removeIrrelevantQuestions(test_semeval_df)
dev_semeval_df1 = removeIrrelevantQuestions(dev_semeval_df1)
dev_semeval_df2 = removeIrrelevantQuestions(dev_semeval_df2)

train_semeval_dataset = train_semeval_df.apply(GravitasData, axis=1).to_numpy()
test_semeval_dataset = test_semeval_df.apply(GravitasData, axis=1).tolist()
dev_semeval_dataset = np.append(
    dev_semeval_df1.apply(GravitasData, axis=1).to_numpy()
    , dev_semeval_df2.apply(GravitasData, axis=1).to_numpy()
)

In [None]:
sag_df = pd.read_excel("datasets/sag.xlsx")
misc_df = pd.read_excel("datasets/misc.xlsx")

sag_dataset = sag_df.apply(GravitasData, axis=1).tolist()
misc_dataset = misc_df.apply(GravitasData, axis=1).tolist()

In [None]:
train_batch_size = 16
num_epochs = 10

In [None]:
kf = KFold(n_splits=5, shuffle=True)
for fold, (sag_index, misc_index) in enumerate(
    zip(kf.split(sag_dataset), kf.split(misc_dataset))
):
    np.random.shuffle(train_semeval_dataset)
    np.random.shuffle(test_semeval_dataset)
    np.random.shuffle(dev_semeval_dataset)

    train_test_sag_index, dev_sag_index = sag_index
    train_test_misc_index, dev_misc_index = misc_index

    train_sag_index, test_sag_index = train_test_split(
        train_test_sag_index, test_size=0.25
    )
    train_misc_index, test_misc_index = train_test_split(
        train_test_misc_index, test_size=0.25
    )

    train_datasets_list = [
        train_semeval_dataset,
        np.take(sag_dataset, train_sag_index),
        np.take(misc_dataset, train_misc_index),
    ]

    dev_datasets_list = [
        dev_semeval_dataset,
        np.take(sag_dataset, dev_sag_index),
        np.take(misc_dataset, dev_misc_index),
    ]

    test_datasets_list = [
        test_semeval_dataset,
        np.take(sag_dataset, test_sag_index),
        np.take(misc_dataset, test_misc_index),
    ]

    train_dataset = torch.utils.data.ConcatDataset(train_datasets_list)
    dev_dataset = torch.utils.data.ConcatDataset(dev_datasets_list)
    test_dataset = torch.utils.data.ConcatDataset(test_datasets_list)

    model = EnsembleEncoder("models/roberta.safetensors","models/miniLM.safetensors","models/gist.safetensors")
    model_save_path = f'output/training_ensemble_fold-{fold}'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    train_dataloader = DataLoader(
        dataset=train_dataset,
        batch_size=train_batch_size,
        shuffle=True,
    )

    evaluator = CECustomEvaluator.from_input_examples(dev_dataset, name="ensemble-dev")

    warmup_steps = math.ceil(
        len(train_dataloader) * num_epochs * 0.1
    )  # 10% of train data for warm-up

    model.fit(
        train_dataloader=train_dataloader,
        freeze=True,
        evaluator=evaluator,
        epochs=num_epochs,
        evaluation_steps=1000,
        warmup_steps=warmup_steps,
        output_path=model_save_path,
        use_amp= True
    )

    test_evaluator = CECustomEvaluator.from_input_examples(
        test_dataset, name="ensemble-test"
    )
    test_evaluator(model, output_path=model_save_path)


2024-02-06 00:20:44 - Use pytorch device: cuda


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:22:45 - Evaluator: Evaluating the model on sts-dev dataset after epoch 0:
2024-02-06 00:23:18 - Correlation:	Pearson: -0.6705	Spearman: -0.6710
2024-02-06 00:23:18 - QWK: -0.1628
2024-02-06 00:23:18 - Save model to output/training_dualbert_fold-02024-02-06_00-20-44


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:25:22 - Evaluator: Evaluating the model on sts-dev dataset after epoch 1:
2024-02-06 00:25:56 - Correlation:	Pearson: -0.6695	Spearman: -0.6712
2024-02-06 00:25:56 - QWK: -0.1665


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:28:01 - Evaluator: Evaluating the model on sts-dev dataset after epoch 2:
2024-02-06 00:28:36 - Correlation:	Pearson: -0.6685	Spearman: -0.6714
2024-02-06 00:28:36 - QWK: -0.1652


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:30:42 - Evaluator: Evaluating the model on sts-dev dataset after epoch 3:
2024-02-06 00:31:17 - Correlation:	Pearson: -0.6676	Spearman: -0.6715
2024-02-06 00:31:17 - QWK: -0.1632


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:33:25 - Evaluator: Evaluating the model on sts-dev dataset after epoch 4:
2024-02-06 00:34:01 - Correlation:	Pearson: -0.6668	Spearman: -0.6717
2024-02-06 00:34:01 - QWK: -0.1619


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:36:10 - Evaluator: Evaluating the model on sts-dev dataset after epoch 5:
2024-02-06 00:36:46 - Correlation:	Pearson: -0.6661	Spearman: -0.6718
2024-02-06 00:36:46 - QWK: -0.1607


Iteration:   0%|          | 0/597 [00:00<?, ?it/s]

2024-02-06 00:38:54 - Evaluator: Evaluating the model on sts-dev dataset after epoch 6:


KeyboardInterrupt: 