# Install dependencies

In [1]:
!pip install requests==2.23.0 numpy==1.18.5 pandas==1.0.3 \
    scikit-learn==0.23.1 pytorch-lightning==0.8.4 torch==1.5.1 \
    transformers==3.0.2 sklearn==0.0 tqdm==4.45.0 neptune-client==0.4.119 \
    matplotlib==3.1.0 scikit-plot==0.3.7

Collecting requests==2.23.0
  Using cached requests-2.23.0-py2.py3-none-any.whl.metadata (6.8 kB)
Collecting numpy==1.18.5
  Using cached numpy-1.18.5.zip (5.4 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'error'


  error: subprocess-exited-with-error
  
  Preparing metadata (pyproject.toml) did not run successfully.
  exit code: 1
  
  [26 lines of output]
  Running from numpy source directory.
  Traceback (most recent call last):
    File "E:\Python\Python3.11.5\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 389, in <module>
      main()
    File "E:\Python\Python3.11.5\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 373, in main
      json_out["return_val"] = hook(**hook_input["kwargs"])
                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    File "E:\Python\Python3.11.5\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 175, in prepare_metadata_for_build_wheel
      return hook(metadata_directory, config_settings)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    File "C:\Users\pc\AppData\Local\Temp\pip-build-env-x45zhur9\overlay\Lib\site-packages\setuptools\build_meta.py", line 377

# Import needed modules

In [2]:
import gc
import json
import math
import os
from abc import ABC, abstractmethod
from collections import OrderedDict
from random import randint
from typing import Iterable, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from matplotlib.figure import Figure
from pandas import DataFrame
from pytorch_lightning import LightningModule, seed_everything
from pytorch_lightning import Trainer as LightningTrainer
from pytorch_lightning.logging.neptune import NeptuneLogger
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.utils import column_or_1d
from torch import Tensor, nn
from torch.nn import functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, IterableDataset
from tqdm.auto import tqdm
from transformers import *



# Define constants

In [None]:
# --- Random seed ---
SEED = 2020
seed_everything(SEED)

# --- Directory ---
ROOT_DIR = os.path.abspath(".")
PROCESSED_DATA_DIR = os.path.join(ROOT_DIR, "data/processed") 
METADATA_FILE_NAME = os.path.join(PROCESSED_DATA_DIR, "metadata.json")
CHECKPOINT_DIR = os.path.join(ROOT_DIR, "checkpoint")

KAGGLE_ENV = bool(os.getenv("KAGGLE_URL_BASE"))
if KAGGLE_ENV:
    # in Kaggle environment
    # 2 datasets should already been added to the notebook
    RAW_DATA_DIR = os.path.join(ROOT_DIR, "../input")
else:
    # in local environment
    RAW_DATA_DIR =  os.path.join(ROOT_DIR, "data/raw")

# --- Datasets ---
DATASET_MAPPING = {
    "SemEval2010Task8": {
        "dir": os.path.join(RAW_DATA_DIR,"semeval2010-task-8"),
        "keep_test_order": True,
        "precision_recall_curve_baseline_img": None,
    },
    "GIDS": {
        "dir": os.path.join(RAW_DATA_DIR,"gids-dataset"),
        "keep_test_order": False,
        "precision_recall_curve_baseline_img": os.path.join(RAW_DATA_DIR,"gids-dataset/GIDS_precision_recall_curve.png"),
    }
}

# change this variable to switch dataset in later tasks
DATASET_NAME = list(DATASET_MAPPING.keys())[1]

# --- Subject & object markup ---
SUB_START_CHAR = "["
SUB_END_CHAR = "]"
OBJ_START_CHAR = "{"
OBJ_END_CHAR = "}"

# --- BERT variants ---
# See https://huggingface.co/transformers/pretrained_models.html for the full list
AVAILABLE_PRETRAINED_MODELS = [
    "distilbert-base-uncased", # 0
    "distilbert-base-cased",   # 1
    "bert-base-uncased",       # 2
    "distilgpt2",              # 3
    "gpt2",                    # 4
    "distilroberta-base",      # 5
    "roberta-base",            # 6
    "albert-base-v1",          # 7
    "albert-base-v2",          # 8
    "bert-large-uncased",      # 9
]

# change this variable to switch pretrained language model
PRETRAINED_MODEL = AVAILABLE_PRETRAINED_MODELS[2]

# if e1 is not related to e2, should "e2 not related to e1" be added to the training set
ADD_REVERSE_RELATIONSHIP = True

# --- Neptune logger ---
# Create a free account at https://neptune.ai/,
# then get the API token and create a project
NEPTUNE_API_TOKEN = " INSERT YOUR API TOKEN HERE "
NEPTUNE_PROJECT_NAME = " INSERT YOUR PROJECT NAME HERE "

# Preprocess

In [5]:
class OrdinalLabelEncoder:
    def __init__(self, init_labels=None):
        if init_labels is None:
            init_labels = []
        self.mapping = OrderedDict({l: i for i, l in enumerate(init_labels)})

    @property
    def classes_(self):
        return list(self.mapping.keys())

    def fit_transform(self, y):
        return self.fit(y).transform(y)

    def fit(self, y):
        y = column_or_1d(y, warn=True)
        new_classes = pd.Series(y).unique()
        for cls in new_classes:
            if cls not in self.mapping:
                self.mapping[cls] = len(self.mapping)
        return self

    def transform(self, y):
        y = column_or_1d(y, warn=True)
        return [self.mapping[value] for value in y]

Seed set to 2020


## Abstract preprocessor class:

In [6]:
class AbstractPreprocessor(ABC):
    DATASET_NAME = ""
    VAL_DATA_PROPORTION = 0.2
    NO_RELATION_LABEL = ""

    def __init__(self, tokenizer: PreTrainedTokenizer):
        self.tokenizer = tokenizer
        self.SUB_START_ID, self.SUB_END_ID, self.OBJ_START_ID, self.OBJ_END_ID \
            = tokenizer.convert_tokens_to_ids([SUB_START_CHAR, SUB_END_CHAR, OBJ_START_CHAR, OBJ_END_CHAR])
        self.label_encoder = OrdinalLabelEncoder([self.NO_RELATION_LABEL])

    def preprocess_data(self, reprocess: bool):
        print(f"\n---> Preprocessing {self.DATASET_NAME} dataset <---")
        
        # create processed data dir
        if not os.path.exists(PROCESSED_DATA_DIR):
            print("Creating processed data directory " + PROCESSED_DATA_DIR)
            os.makedirs(PROCESSED_DATA_DIR)

        # stop preprocessing if file existed
        json_file_names = [self.get_dataset_file_name(k) for k in ("train", "val", "test")]
        existed_files = [fn for fn in json_file_names if os.path.exists(fn)]
        if existed_files:
            file_text = "- " + "\n- ".join(existed_files)
            if not reprocess:
                print("The following files already exist:")
                print(file_text)
                print("Preprocessing is skipped. See option --reprocess.")
                return
            else:
                print("The following files will be overwritten:")
                print(file_text)

        train_data, val_data, test_data = self._preprocess_data()

        print("Saving to json files")
        self._write_data_to_file(train_data, "train")
        self._write_data_to_file(val_data, "val")
        self._write_data_to_file(test_data, "test")

        self._save_metadata({
            "train_size": len(train_data),
            "val_size": len(val_data),
            "test_size": len(test_data),
            "no_relation_label": self.NO_RELATION_LABEL,
            **self._get_label_mapping()
        })

        self._create_secondary_data_files()

        print("---> Done ! <---")

    @abstractmethod
    def _preprocess_data(self) -> Tuple[DataFrame, DataFrame, DataFrame]:
        pass

    def _create_secondary_data_files(self):
        """
        From the primary data file, create a data file with binary labels
        and a data file with only sentences classified as "related"
        """

        with open(METADATA_FILE_NAME) as f:
            root_metadata = json.load(f)
            metadata = root_metadata[self.DATASET_NAME]

        related_only_count = {
            "train": 0,
            "val": 0,
            "test": 0,
        }

        for key in ["train", "test", "val"]:
            print(f"Creating secondary files for {key} data")

            origin_file = open(self.get_dataset_file_name(key))
            bin_file = open(self.get_dataset_file_name(f"{key}_binary"), "w")
            related_file = open(self.get_dataset_file_name(f"{key}_related_only"), "w")

            total = metadata[f"{key}_size"]

            for line in tqdm(origin_file, total=total):
                data = json.loads(line)
                if data["label"] != 0:
                    related_only_count[key] += 1
                    data["label"] -= 1 # label in "related_only" files is 1 less than the original label
                    related_file.write(json.dumps(data) + "\n")
                    data["label"] = 1 # in binary dataset, all "related" classes have label 1
                    bin_file.write(json.dumps(data) + "\n")
                else:
                    bin_file.write(json.dumps(data) + "\n")

            origin_file.close()
            bin_file.close()
            related_file.close()

        print("Updating metadata.json")
        for key in ["train", "test", "val"]:
            metadata[f"{key}_related_only_size"] = related_only_count[key]
        root_metadata[self.DATASET_NAME] = metadata
        with open(METADATA_FILE_NAME, "w") as f:
            json.dump(root_metadata, f, indent=4)

    def _find_sub_obj_pos(self, input_ids_list: Iterable) -> DataFrame:
        """
        Find subject and object position in a sentence
        """
        sub_start_pos = [self._index(s, self.SUB_START_ID) + 1 for s in input_ids_list]
        sub_end_pos = [self._index(s, self.SUB_END_ID, sub_start_pos[i]) for i, s in enumerate(input_ids_list)]
        obj_start_pos = [self._index(s, self.OBJ_START_ID) + 1 for s in input_ids_list]
        obj_end_pos = [self._index(s, self.OBJ_END_ID, obj_start_pos[i]) for i, s in enumerate(input_ids_list)]
        return DataFrame({
            "sub_start_pos": sub_start_pos,
            "sub_end_pos": sub_end_pos,
            "obj_start_pos": obj_start_pos,
            "obj_end_pos": obj_end_pos,
        })

    @staticmethod
    def _index(lst: list, ele: int, start: int = 0) -> int:
        """
        Find an element in a list. Returns -1 if not found instead of raising an exception.
        """
        try:
            return lst.index(ele, start)
        except ValueError:
            return -1

    def _clean_data(self, raw_sentences: list, labels: list) -> DataFrame:
        if not raw_sentences:
            return DataFrame()

        tokens = self.tokenizer(raw_sentences, truncation=True, padding="max_length")
        data = DataFrame(tokens.data)
        data["label"] = self.label_encoder.fit_transform(labels)
        sub_obj_position = self._find_sub_obj_pos(data["input_ids"])
        data = pd.concat([data, sub_obj_position], axis=1)
        data = self._remove_invalid_sentences(data)
        return data

    def _remove_invalid_sentences(self, data: DataFrame) -> DataFrame:
        """
        Remove sentences without subject/object or whose subject/object
        is beyond the maximum length the model supports
        """
        seq_max_len = self.tokenizer.model_max_length
        return data.loc[
            (data["sub_end_pos"] < seq_max_len)
            & (data["obj_end_pos"] < seq_max_len)
            & (data["sub_end_pos"] > -1)
            & (data["obj_end_pos"] > -1)
        ]

    def _get_label_mapping(self):
        """
        Returns a mapping from id to label and vise versa from the label encoder
        """
        # all labels
        id_to_label = dict(enumerate(self.label_encoder.classes_))
        label_to_id = {v: k for k, v in id_to_label.items()}

        # for the related_only dataset
        # ignore id 0, which represent no relation
        id_to_label_related_only = {k - 1: v for k, v in id_to_label.items() if k != 0}
        label_to_id_related_only = {v: k for k, v in id_to_label_related_only.items()}

        return {
            "id_to_label": id_to_label,
            "label_to_id": label_to_id,
            "id_to_label_related_only": id_to_label_related_only,
            "label_to_id_related_only": label_to_id_related_only,            
        }

    def _write_data_to_file(self, dataframe: DataFrame, subset: str):
        """Write data in a dataframe to train/val/test file"""
        lines = ""
        for _, row in dataframe.iterrows():
            lines += row.to_json() + "\n"
        with open(self.get_dataset_file_name(subset), "w") as file:
            file.write(lines)

    def _save_metadata(self, metadata: dict):
        """Save metadata to metadata.json"""
        # create metadata file
        if not os.path.exists(METADATA_FILE_NAME):
            print(f"Create metadata file at {METADATA_FILE_NAME}")
            with open(METADATA_FILE_NAME, "w") as f:
                f.write("{}\n")

        # add metadata
        print("Saving metadata")
        with open(METADATA_FILE_NAME) as f:
            root_metadata = json.load(f)
        with open(METADATA_FILE_NAME, "w") as f:
            root_metadata[self.DATASET_NAME] = metadata
            json.dump(root_metadata, f, indent=4)

    @classmethod
    def get_dataset_file_name(cls, key: str) -> str:
        return os.path.join(PROCESSED_DATA_DIR, f"{cls.DATASET_NAME.lower()}_{key}.json")

## Concrete preprocessor for each dataset:

In [7]:
class SemEval2010Task8Preprocessor(AbstractPreprocessor):
    DATASET_NAME = "SemEval2010Task8"
    NO_RELATION_LABEL = "Other"
    RAW_TRAIN_FILE_NAME = os.path.join(DATASET_MAPPING["SemEval2010Task8"]["dir"],
                                       "SemEval2010_task8_training/TRAIN_FILE.TXT")
    RAW_TEST_FILE_NAME = os.path.join(DATASET_MAPPING["SemEval2010Task8"]["dir"],
                                      "SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT")
    RAW_TRAIN_DATA_SIZE = 8000
    RAW_TEST_DATA_SIZE = 2717

    def _preprocess_data(self):
        print("Processing training data")
        train_data = self._process_file(
            self.RAW_TRAIN_FILE_NAME,
            self.RAW_TRAIN_DATA_SIZE,
            ADD_REVERSE_RELATIONSHIP,
        )

        print("Processing test data")
        test_data = self._process_file(
            self.RAW_TEST_FILE_NAME,
            self.RAW_TEST_DATA_SIZE,
            False,
        )

        print("Splitting train & validate data")
        train_data, val_data = train_test_split(train_data, shuffle=True, random_state=SEED)

        return train_data, val_data, test_data

    def _process_file(self, file_name: str, dataset_size: int, add_reverse: bool) -> DataFrame:
        raw_sentences = []
        labels = []
        with open(file_name) as f:
            for _ in tqdm(range(dataset_size)):
                sent = f.readline()
                label, sub, obj = self._process_label(f.readline())
                labels.append(label)
                raw_sentences.append(self._process_sentence(sent, sub, obj))
                if label == "Other" and add_reverse:
                    labels.append(label)
                    raw_sentences.append(self._process_sentence(sent, obj, sub))
                f.readline()
                f.readline()

        return self._clean_data(raw_sentences, labels)

    @staticmethod
    def _process_sentence(sentence: str, sub: int, obj: int) -> str:
        return sentence.split("\t")[1][1:-2] \
            .replace(f"<e{sub}>", SUB_START_CHAR) \
            .replace(f"</e{sub}>", SUB_END_CHAR) \
            .replace(f"<e{obj}>", OBJ_START_CHAR) \
            .replace(f"</e{obj}>", OBJ_END_CHAR)

    @staticmethod
    def _process_label(label: str) -> Tuple[str, int, int]:
        label = label.strip()
        if label == "Other":
            return label, 1, 2
        nums = list(filter(str.isdigit, label))
        return label, int(nums[0]), int(nums[1])


class GIDSPreprocessor(AbstractPreprocessor):
    DATASET_NAME = "GIDS"
    RAW_TRAIN_FILE_NAME = os.path.join(DATASET_MAPPING["GIDS"]["dir"], "train.tsv")
    RAW_VAL_FILE_NAME = os.path.join(DATASET_MAPPING["GIDS"]["dir"], "val.tsv")
    RAW_TEST_FILE_NAME = os.path.join(DATASET_MAPPING["GIDS"]["dir"], "test.tsv")
    TRAIN_SIZE = 11297
    VAL_SIZE = 1864
    TEST_SIZE = 5663
    NO_RELATION_LABEL = "NA"

    def _process_file(self, file_name: str, add_reverse: bool) -> DataFrame:
        """
        Process a file in batches
        Return the total data size
        """
        with open(file_name) as in_file:
            lines = in_file.readlines()
            
        raw_sentences = []
        labels = []

        for line in tqdm(lines):
            _, _, sub, obj, label, sent = line.split("\t")
            sent = sent.replace("###END###", "")

            # add subject markup
            new_sub = SUB_START_CHAR + " " + sub.replace("_", " ") + " " + SUB_END_CHAR
            new_obj = OBJ_START_CHAR + " " +  obj.replace("_", " ") + " " + OBJ_END_CHAR
            sent = sent.replace(sub, new_sub).replace(obj, new_obj)
            raw_sentences.append(sent)
            labels.append(label)

            if add_reverse and label == self.NO_RELATION_LABEL:
                new_sub = OBJ_START_CHAR + " " + sub.replace("_", " ") + " " + OBJ_END_CHAR
                new_obj = SUB_START_CHAR + " " + obj.replace("_", " ") + " " + SUB_END_CHAR
                sent = sent.replace(sub, new_sub).replace(obj, new_obj)
                raw_sentences.append(sent)
                labels.append(label)

        return self._clean_data(raw_sentences, labels)

    def _preprocess_data(self):
        
        print("Process train dataset")
        train_data = self._process_file(
            self.RAW_TRAIN_FILE_NAME,
            ADD_REVERSE_RELATIONSHIP,
        )

        print("Process val dataset")
        val_data = self._process_file(
            self.RAW_VAL_FILE_NAME,
            False,
        )
        
        print("Process test dataset")
        test_data = self._process_file(
            self.RAW_TEST_FILE_NAME,
            False,
        )

        return train_data, val_data, test_data

## Factory method to create preprocessors:

In [14]:
def get_preprocessor_class(dataset_name: str = DATASET_NAME):
    return globals()[f"{dataset_name}Preprocessor"]
        
def get_preprocessor(dataset_name: str = DATASET_NAME)-> AbstractPreprocessor:
    tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL, use_fast=True)
    # some tokenizer, like GPTTokenizer, doesn't have pad_token
    # in this case, we use eos token as pad token
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        
    preprocessors_class = get_preprocessor_class(dataset_name)
    return preprocessors_class(tokenizer)

Downloading GIDS train set from https://raw.githubusercontent.com/SharmisthaJat/RE-DS-Word-Attention-Models/master/Data/GIDS/train.tsv...
train.tsv downloaded successfully.
Downloading GIDS validation set from https://raw.githubusercontent.com/SharmisthaJat/RE-DS-Word-Attention-Models/master/Data/GIDS/dev.tsv...
val.tsv downloaded successfully.
Downloading GIDS test set from https://raw.githubusercontent.com/SharmisthaJat/RE-DS-Word-Attention-Models/master/Data/GIDS/test.tsv...
test.tsv downloaded successfully.
Downloading SemEval2010 Task 8 dataset from Hugging Face...
Dataset structure: DatasetDict({
    train: Dataset({
        features: ['sentence', 'relation'],
        num_rows: 8000
    })
    test: Dataset({
        features: ['sentence', 'relation'],
        num_rows: 2717
    })
})
Train set size: 8000
Test set size: 2717
DEBUG: Example: sentence
DEBUG: Example: relation
Training set saved to datasets/semeval2010\SemEval2010_task8_training\TRAIN_FILE.TXT
Test set saved to data

## Preprocess data:

In [16]:
preprocessor = get_preprocessor()
preprocessor.preprocess_data(reprocess=True)

# Dataset

In [17]:
class GenericDataset(IterableDataset):
    """A generic dataset for train/val/test data for both SemEval and GIDS dataset"""

    def __init__(self, dataset_name: str, subset: str, batch_size: int, label_transform: str):
        assert subset in ["train", "val", "test"]
        assert label_transform in ["none", "binary", "related_only"]

        file_name = subset if label_transform == "none" \
            else f"{subset}_{label_transform}"

        preprocessor_class = get_preprocessor_class()
        with open(METADATA_FILE_NAME) as f:
            metadata = json.load(f)[dataset_name]

        size = metadata[f"{subset}_related_only_size"] \
            if label_transform is "related_only" \
            else metadata[f"{subset}_size"]

        self.subset = subset
        self.batch_size = batch_size
        self.length = math.ceil(size / batch_size)
        self.file = open(preprocessor_class.get_dataset_file_name(file_name))

        self.keep_test_order = self.subset == "test" and DATASET_MAPPING[dataset_name]["keep_test_order"]

    def __del__(self):
        if self.file:
            self.file.close()

    def __iter__(self):
        """
        Implement "smart batching"
        """

        data = [json.loads(line) for line in self.file]
        if not self.keep_test_order:
            data = sorted(data, key=lambda x: sum(x["attention_mask"]))

        new_data = []

        while len(data) > 0:
            if self.keep_test_order or len(data) < self.batch_size:
                idx = 0
            else:
                idx = randint(0, len(data) - self.batch_size)
            batch = data[idx:idx + self.batch_size]
            max_len = max([sum(b["attention_mask"]) for b in batch])

            for b in batch:
                input_data = {}
                for k, v in b.items():
                    if k != "label":
                        if isinstance(v, list):
                            input_data[k] = torch.tensor(v[:max_len])
                        else:
                            input_data[k] = torch.tensor(v)
                label = torch.tensor(b["label"])
                new_data.append((input_data, label))

            del data[idx:idx + self.batch_size]

        yield from new_data
    
    def __len__(self):
        return self.length

    def as_batches(self):
        input_data = []
        label = []
        
        def create_batch():
            return (
                {k: torch.stack([x[k] for x in input_data]).cuda() for k in input_data[0].keys()},
                torch.tensor(label).cuda()
            )
        
        for ip, l in self:
            input_data.append(ip)
            label.append(l)
            if len(input_data) == self.batch_size:
                yield create_batch()
                input_data.clear()
                label.clear()

        yield create_batch()

# Classifiers

In [18]:
class BaseClassifier(LightningModule, ABC):
    """
    Base class of all classifiers
    """
    
    dataset_label_transform = None

    @abstractmethod
    def loss_function(self, logits: Tensor, label: Tensor) -> Tensor:
        """
        Calculate the loss of the model
        It MUST take care of the last activation layer
        """
        pass
    
    @abstractmethod
    def log_metrics(self, epoch_type: str, logits: Tensor, label: Tensor) -> dict:
        pass

    def __init__(self, pretrained_language_model, dataset_name, batch_size, learning_rate, decay_lr_speed,
                 dropout_p, activation_function, weight_decay, linear_size):
        super().__init__()
        self.save_hyperparameters()
        self.test_proposed_answer = None

        self.language_model = AutoModel.from_pretrained(pretrained_language_model)
        config = self.language_model.config
        self.max_seq_len = config.max_position_embeddings
        self.hidden_size = config.hidden_size

        self.linear = nn.Linear(self.hidden_size, linear_size)
        self.linear_output = nn.Linear(linear_size, self.num_classes)

        self.dropout = nn.Dropout(p=dropout_p)
        self.activation_function = getattr(nn, activation_function)()

    def forward(self, sub_start_pos, sub_end_pos,
                obj_start_pos, obj_end_pos, *args, **kwargs) -> Tensor:
        language_model_output = self.language_model(*args, **kwargs)
        if isinstance(language_model_output, tuple):
            language_model_output = language_model_output[0]

        x = torch.mean(language_model_output, dim=1)
        x = self.dropout(x)
        x = self.linear(x)
        x = self.activation_function(x)
        x = self.dropout(x)
        logits = self.linear_output(x)

        return logits

    def train_dataloader(self) -> DataLoader:
        return self.__get_dataloader("train")

    def val_dataloader(self) -> DataLoader:
        return self.__get_dataloader("val")

    def test_dataloader(self) -> DataLoader:
        return self.__get_dataloader("test")

    def __get_dataloader(self, subset: str) -> DataLoader:
        batch_size = self.hparams.batch_size
        dataset = GenericDataset(
            self.hparams.dataset_name,
            subset, 
            batch_size, 
            self.dataset_label_transform
        )
        return DataLoader(
            dataset,
            batch_size=batch_size,
            num_workers=1
        )

    def configure_optimizers(self):
        optimizer = AdamW(
            [p for p in self.parameters() if p.requires_grad],
            lr=self.hparams.learning_rate,
            weight_decay=self.hparams.weight_decay
        )
        scheduler = LambdaLR(optimizer, lambda epoch: self.hparams.decay_lr_speed[epoch])
        return [optimizer], [scheduler]
    
    def training_step(self, batch: Tuple[dict, Tensor], batch_nb: int) -> dict:
        input_data, label = batch
        logits = self(**input_data)

        loss = self.loss_function(logits, label)
        log = {"train_loss": loss}

        return {"loss": loss, "log": log}

    def __eval_step(self, batch:  Tuple[dict, Tensor]) -> dict:
        input_data, label = batch
        logits = self(**input_data)

        return {
            "logits": logits,
            "label": label,
        }
    
    def validation_step(self, batch: Tuple[dict, Tensor], batch_nb: int) -> dict:
        return self.__eval_step(batch)
    
    def test_step(self, batch: Tuple[dict, Tensor], batch_nb: int) -> dict:
        return self.__eval_step(batch)

    def __eval_epoch_end(self, epoch_type: str, outputs: Iterable[dict]) -> dict:
        assert epoch_type in ["test", "val"]
        
        logits = torch.cat([x["logits"] for x in outputs]).cpu()
        label = torch.cat([x["label"] for x in outputs]).cpu()
        
        logs = self.log_metrics(epoch_type, logits, label)
        
        return {"progress_bar": logs}
    
    def validation_epoch_end(self, outputs: Iterable[dict]) -> dict:
        return self.__eval_epoch_end("val", outputs)

    def test_epoch_end(self, outputs: Iterable[dict]) -> dict:
        return self.__eval_epoch_end("test", outputs)
    
    def numeric_labels_to_text(self, label):
        """Revert labels from number to text"""
        if self.dataset_label_transform == "binary":
            label = ["Positive" if x else "Negative" for x in label]
        else:
            with open(METADATA_FILE_NAME) as f:
                meta = json.load(f)[self.hparams.dataset_name]
            if self.dataset_label_transform == "none":
                mapping = meta["id_to_label"]
            else:
                mapping = meta["id_to_label_related_only"]
            label = [mapping[str(int(x))] for x in label]
        return label

    @staticmethod
    def plot_confusion_matrix(predicted_label, label) -> Figure:
        result = confusion_matrix(label, predicted_label)
        display = ConfusionMatrixDisplay(result)
        fig, ax = plt.subplots(figsize=(16, 12))
        display.plot(cmap=plt.cm.get_cmap("Blues"), ax=ax, xticks_rotation='vertical')
        return fig

    def log_confusion_matrix(self, prefix: str, predicted_label: Tensor, label: Tensor):
        predicted_label = self.numeric_labels_to_text(predicted_label)
        label = self.numeric_labels_to_text(label)
        fig = self.plot_confusion_matrix(predicted_label, label)
        self.logger.experiment.log_image(f"{prefix}_confusion_matrix", fig)


class MulticlassClassifier(BaseClassifier, ABC):
    """
    Base class for multiclass classifiers
    """

    def loss_function(self, logits: Tensor, label: Tensor)-> Tensor:
        return F.cross_entropy(logits, label)

    @staticmethod
    def logits_to_label(logits: Tensor) -> Tensor:
        return torch.argmax(logits, dim=-1)

    def log_metrics(self, epoch_type: str, logits: Tensor, label: Tensor) -> dict:
        predicted_label = self.logits_to_label(logits)
        self.log_confusion_matrix(epoch_type, predicted_label, label)

        logs = {
            f"{epoch_type}_avg_loss": float(self.loss_function(logits, label)),
            f"{epoch_type}_acc": accuracy_score(label, predicted_label),
            f"{epoch_type}_pre_weighted": precision_score(label, predicted_label, average="weighted"),
            f"{epoch_type}_rec_weighted": recall_score(label, predicted_label, average="weighted"),
            f"{epoch_type}_f1_weighted": f1_score(label, predicted_label, average="weighted"),
            f"{epoch_type}_pre_macro": precision_score(label, predicted_label, average="macro"),
            f"{epoch_type}_rec_macro": recall_score(label, predicted_label, average="macro"),
            f"{epoch_type}_f1_macro": f1_score(label, predicted_label, average="macro"),
        }

        for k, v in logs.items():
            self.logger.experiment.log_metric(k, v)

        return logs


class StandardClassifier(MulticlassClassifier):
    """
    A classifier that can recognize the "not related" as well as other relations
    """

    dataset_label_transform = "none"

    def __init__(self, dataset_name, **kwargs):
        with open(METADATA_FILE_NAME) as f:
            self.num_classes = len(json.load(f)[dataset_name]["label_to_id"])
        self.test_proposed_answer = None
        super().__init__(dataset_name=dataset_name, **kwargs)


    def log_metrics(self, epoch_type: str, logits: Tensor, label: Tensor)-> dict:
        if epoch_type == "test":
            self.test_proposed_answer = self.logits_to_label(logits).tolist()
        self.__log_precision_recall_curve(epoch_type, logits, label)
        return super().log_metrics(epoch_type, logits, label)

    def __log_precision_recall_curve(self, epoch_type: str, logits: Tensor, label: Tensor):
        """
        Log the micro-averaged precision recall curve
        Ref: https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html
        """

        label = torch.tensor(label_binarize(label, classes=list(range(self.num_classes)))).flatten()
        logits = logits.flatten()

        pre, rec, thresholds = precision_recall_curve(label, logits)
        f1s = 2 * pre * rec / (pre + rec)
        ix = np.argmax(f1s)

        fig, ax = plt.subplots(figsize=(10, 10))

        # render the baseline curves as background for comparison
        background = DATASET_MAPPING[self.hparams.dataset_name]["precision_recall_curve_baseline_img"]
        if background:
            img = plt.imread(background)
            ax.imshow(img, extent=[0, 1, 0, 1])

        no_skill = len(label[label == 1]) / len(label)
        ax.plot(rec, pre, label="Our proposed model", color="blue")
        ax.set_xlabel("Recall")
        ax.set_ylabel("Precision")
        ax.legend()

        self.logger.experiment.log_image(f"{epoch_type}_pre_rec_curve", fig)
        self.logger.experiment.log_metric(
            f"{epoch_type}_average_precision_score_micro",
            average_precision_score(label, logits, average="micro")
        )


class BinaryClassifier(BaseClassifier):
    """
    A binary classifier that picks out "not-related" sentences
    """

    dataset_label_transform = "binary"
    num_classes = 1

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.thresholds = {}

    def forward(self, *args, **kwargs):
        return super().forward(*args, **kwargs).flatten()

    @staticmethod
    def yhat_to_label(y_hat: Tensor, threshold: float) -> Tensor:
        return (y_hat > threshold).long()

    def loss_function(self, logits: Tensor, label: Tensor) -> Tensor:
        return F.binary_cross_entropy_with_logits(logits, label.float())

    def log_metrics(self, epoch_type: str, logits: Tensor, label: Tensor) -> dict:
        y_hat = torch.sigmoid(logits)

        if epoch_type == "val":
            self.__find_thresholds(y_hat, label)

        self.__log_output_distribution(epoch_type, y_hat, label)

        logs = {
            f"{epoch_type}_avg_loss": float(self.loss_function(logits, label)),
            f"{epoch_type}_roc_auc": self.__roc_auc_score(label, y_hat),
        }

        for criteria, threshold in self.thresholds.items():
            prefix = f"{epoch_type}_{criteria}"
            predicted_label = self.yhat_to_label(y_hat, threshold)
            self.log_confusion_matrix(prefix, predicted_label, label)

            logs[f"{prefix}_acc"] = accuracy_score(label, predicted_label)
            logs[f"{prefix}_pre"] = precision_score(label, predicted_label, average="binary")
            logs[f"{prefix}_rec"] = recall_score(label, predicted_label, average="binary")
            logs[f"{prefix}_f1"] = f1_score(label, predicted_label, average="binary")

        for k, v in logs.items():
            self.logger.experiment.log_metric(k, v)

        return logs

    @staticmethod
    def __roc_auc_score(label: Tensor, y_hat: Tensor) -> float:
        try:
            return roc_auc_score(label, y_hat)
        except ValueError:
            return 0

    def __find_thresholds(self, y_hat: Tensor, label: Tensor):
        """
        Find 3 classification thresholds based on 3 criteria:
        - The one that yields highest accuracy
        - The "best point" in the ROC curve
        - The one that yields highest f1
        The results are logged and stored in self.threshold
        """
        # best accuracy
        best_acc = 0
        best_acc_threshold = None
        for y in y_hat:
            y_predicted = self.yhat_to_label(y_hat, threshold=y)
            acc = accuracy_score(label, y_predicted)
            if best_acc < acc:
                best_acc = acc
                best_acc_threshold = y
        self.thresholds["best_acc"] = best_acc_threshold

        # ROC curve
        # https://machinelearningmastery.com/threshold-moving-for-imbalanced-classification/
        fpr, tpr, thresholds = roc_curve(label, y_hat)
        gmeans = tpr * (1 - fpr)
        ix = np.argmax(gmeans)
        self.thresholds["best_roc"] = thresholds[ix]
        fig, ax = plt.subplots(figsize=(16, 12))
        ax.plot([0,1], [0,1], linestyle="--", label="No Skill")
        ax.plot(fpr, tpr, marker=".", label="Logistic")
        ax.scatter(fpr[ix], tpr[ix], marker="o", color="black", label="Best")
        ax.set_xlabel("False Positive Rate")
        ax.set_ylabel("True Positive Rate")
        ax.legend()
        self.logger.experiment.log_image("roc_curve", fig)

        # precision recall curve
        # https://machinelearningmastery.com/roc-curves-and-precision-recall-curves-for-classification-in-python/
        pre, rec, thresholds = precision_recall_curve(label, y_hat)
        f1s = 2 * pre * rec / (pre + rec)
        ix = np.argmax(f1s)
        self.thresholds["best_f1"] = thresholds[ix]

        fig, ax = plt.subplots(figsize=(16, 12))
        no_skill = len(label[label == 1]) / len(label)
        ax.plot([0, 1], [no_skill, no_skill], linestyle="--", label="No Skill")
        ax.plot(rec, pre, marker=".", label="Logistic")
        ax.scatter(rec[ix], pre[ix], marker="o", color="black", label="Best F1")
        ax.set_xlabel("Recall")
        ax.set_ylabel("Precision")
        ax.legend()
        self.logger.experiment.log_image("pre_rec_curve", fig)

        # log thresholds
        for k, v in self.thresholds.items():
            self.logger.experiment.log_metric(f"threshold_{k}", v)

    def __log_output_distribution(self, epoch_type: str, y_hat: Tensor, label: Tensor):
        """
        Log the distribution of the model output and 3 thresholds with log scale and linear scale
        """
        y_neg = y_hat[label == 0].numpy()
        y_pos = y_hat[label == 1].numpy()

        for scale in ["linear", "log"]:
            fig, ax = plt.subplots(figsize=(16, 12))
            ax.set_yscale(scale)
            ax.hist([y_neg, y_pos], stacked=True, bins=50, label=["No relation", "Related"])
            ylim = ax.get_ylim()
            for k, v in self.thresholds.items():
                ax.plot([v, v], ylim, linestyle="--", label=f"{k} threshold")
            ax.legend()
            self.logger.experiment.log_image(f"{epoch_type}_distribution_{scale}_scale", fig)


class RelationClassifier(MulticlassClassifier):
    """
    A classifier that recognizes relations except for "not-related"
    """

    dataset_label_transform = "related_only"

    def __init__(self, dataset_name, **kwargs):
        with open(METADATA_FILE_NAME) as f:
            self.num_classes = len(json.load(f)[dataset_name]["label_to_id_related_only"])
        super().__init__(dataset_name=dataset_name, **kwargs)

loading configuration file config.json from cache at C:\Users\pc\.cache\huggingface\hub\models--bert-base-uncased\snapshots\86b5e0934494bd15c9632b12f734a8a67f723594\config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.39.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file vocab.txt from cache at C:\Users\pc\.cache\huggingface\hub\models--bert-base-uncased\snapshots\86b5e0934494bd15c9632b12f734a8a67f723594\vocab.txt
loa


---> Preprocessing GIDS dataset <---
Process train dataset


  0%|          | 0/11297 [00:00<?, ?it/s]

Process val dataset


  0%|          | 0/1864 [00:00<?, ?it/s]

Process test dataset


  0%|          | 0/5663 [00:00<?, ?it/s]

Saving to json files
Create metadata file at E:\Python\TextMining\BERT\data/processed\metadata.json
Saving metadata
Creating secondary files for train data


  0%|          | 0/14051 [00:00<?, ?it/s]

Creating secondary files for test data


  0%|          | 0/5653 [00:00<?, ?it/s]

Creating secondary files for val data


  0%|          | 0/1864 [00:00<?, ?it/s]

Updating metadata.json
---> Done ! <---


# The official scorer
Some datasets comes with official scorers. We will define them in this session.

In [20]:
class AbstractScorer(ABC):
    def __init__(self, experiment_no: int, logger):
        self.experiment_no = experiment_no
        self.logger = logger

    @abstractmethod
    def score(self, proposed_answer: dict):
        pass

class SemEval2010Task8Scorer(AbstractScorer):
    RESULT_FILE = "semeval2010_task8_official_score_{}_{}.txt"
    PROPOSED_ANSWER_FILE = "semeval2010_task8_proposed_answer.txt"
    SCORER = os.path.join(DATASET_MAPPING["SemEval2010Task8"]["dir"], "SemEval2010_task8_scorer-v1.2/semeval2010_task8_scorer-v1.2.pl")
    FORMAT_CHECKER = os.path.join(DATASET_MAPPING["SemEval2010Task8"]["dir"], "SemEval2010_task8_scorer-v1.2/semeval2010_task8_format_checker.pl")
    ANSWER_KEY = os.path.join(DATASET_MAPPING["SemEval2010Task8"]["dir"], "SemEval2010_task8_testing_keys/TEST_FILE_KEY.TXT")

    def score(self, proposed_answer: dict):
        # write test_result to file
        with open(METADATA_FILE_NAME) as f:
            metadata = json.load(f)
            id_to_label = {int(k): v for k, v in metadata[DATASET_NAME]["id_to_label"].items()}

        for criteria, answer in proposed_answer.items():
            result_file = self.RESULT_FILE.format(self.experiment_no, criteria)
            i = 8001
            with open(self.PROPOSED_ANSWER_FILE, "w") as f:
                for r in answer:
                    f.write(f"{i}\t{id_to_label[r]}\n")
                    i += 1

            # call the official scorer
            os.system(f"perl {self.FORMAT_CHECKER} {self.PROPOSED_ANSWER_FILE}")
            os.system(f"perl {self.SCORER} {self.PROPOSED_ANSWER_FILE} {self.ANSWER_KEY} > {result_file}")

            # log the official score
            with open(result_file) as f:
                result = f.read()
                print(f">>> Classifier with criteria: {criteria} <<<")
                print(result)
                print("\n\n")
            self.logger.experiment.log_artifact(result_file)

def get_official_scorer(experiment_no: int, logger, dataset_name: str = DATASET_NAME) -> AbstractScorer:
    cls = globals().get(dataset_name + "Scorer")
    if cls:
        return cls(experiment_no, logger)

# Claiming back memory & disk space

In [21]:
1 / 0

In [18]:
trainer = classifier = rel_trainer = rel_classifier = bin_trainer = bin_classifier = None
gc.collect()
torch.cuda.empty_cache()

ZeroDivisionError: division by zero

# Training standard classifier

In [None]:
GPUS = 1
MIN_EPOCHS = MAX_EPOCHS = 3

BATCH_SIZE = 16
LEARNING_RATE = 2e-05
LEARNING_RATE_DECAY_SPEED = [1, 1, 0.75, 0.5, 0.25, 0.1, 0.075, 0.05, 0.025, 0.01]

LINEAR_SIZE = 1024

DROPOUT_P = 0.2
ACTIVATION_FUNCTION = "PReLU"
WEIGHT_DECAY = 0.01 # default = 0.01

In [24]:
logger = NeptuneLogger(
    api_key=NEPTUNE_API_TOKEN,
    project_name=NEPTUNE_PROJECT_NAME,
    close_after_fit=False,
)

try:
    for i in range(4):
        print(f"--------- EXPERIMENT {i} ---------")

        classifier = trainer = None
        gc.collect()
        torch.cuda.empty_cache()

        trainer = LightningTrainer(
            gpus=GPUS,
            min_epochs=MIN_EPOCHS,
            max_epochs=MAX_EPOCHS,
            default_root_dir=CHECKPOINT_DIR,
            reload_dataloaders_every_epoch=True, # needed as we loop over a file,
            deterministic=False,
            checkpoint_callback=False,
            logger=logger
        )

        classifier = StandardClassifier(
            pretrained_language_model=PRETRAINED_MODEL,
            dataset_name=DATASET_NAME,
            batch_size=BATCH_SIZE,
            learning_rate=LEARNING_RATE,
            decay_lr_speed=LEARNING_RATE_DECAY_SPEED,
            dropout_p=DROPOUT_P,
            activation_function=ACTIVATION_FUNCTION,
            weight_decay=WEIGHT_DECAY,
            linear_size=LINEAR_SIZE,
        )

        trainer.fit(classifier)
        trainer.test(classifier)

        scorer = get_official_scorer(i, logger)
        if scorer:
            scorer.score({
                "standard": classifier.test_proposed_answer,
            })
        else:
            print("No official scorer found")

except Exception as e:
    logger.experiment.stop(str(e))
    raise e

else:
    logger.experiment.stop()

ZeroDivisionError: division by zero

# Training binary classifier

In [None]:
GPUS = 1

BIN_MIN_EPOCHS = BIN_MAX_EPOCHS = 2

BIN_BATCH_SIZE = 32
BIN_LEARNING_RATE = 2e-05
BIN_LEARNING_RATE_DECAY_SPEED = [1, 1, 0.75, 0.5, 0.5, 0.25, 0.25, 0.1, 0.075, 0.05, 0.025, 0.01]

BIN_LINEAR_SIZE = 256

BIN_DROPOUT_P = 0.2
BIN_ACTIVATION_FUNCTION = "PReLU"
BIN_WEIGHT_DECAY = 0.01 # default = 0.01

In [26]:
bin_logger = NeptuneLogger(
    api_key=NEPTUNE_API_TOKEN,
    project_name=NEPTUNE_PROJECT_NAME,
    close_after_fit=False,
)

try:
    for i in range(4):
        print(f"--------- EXPERIMENT {i} ---------")
    
        bin_classifier = bin_trainer = None
        gc.collect()
        torch.cuda.empty_cache()
    
        bin_trainer = LightningTrainer(
            gpus=GPUS,
            min_epochs=BIN_MIN_EPOCHS,
            max_epochs=BIN_MAX_EPOCHS,
            default_root_dir=CHECKPOINT_DIR,
            reload_dataloaders_every_epoch=True, # needed as we loop over a file,
            deterministic=False,
            checkpoint_callback=False,
            logger=bin_logger,
        )
    
        bin_classifier = BinaryClassifier(
            pretrained_language_model=PRETRAINED_MODEL,
            dataset_name=DATASET_NAME,
            batch_size=BIN_BATCH_SIZE,
            learning_rate=BIN_LEARNING_RATE,
            decay_lr_speed=BIN_LEARNING_RATE_DECAY_SPEED,
            linear_size=BIN_LINEAR_SIZE,
            dropout_p=BIN_DROPOUT_P,
            activation_function=BIN_ACTIVATION_FUNCTION,
            weight_decay=BIN_WEIGHT_DECAY,
        )

        bin_trainer.fit(bin_classifier)
        bin_trainer.test(bin_classifier)

except Exception as e:
    bin_logger.experiment.stop(str(e))
    raise e

else:
    bin_logger.experiment.stop()

# Train relation classifier

In [None]:
GPUS = 1
REL_MIN_EPOCHS = REL_MAX_EPOCHS = 4

REL_BATCH_SIZE = 32
REL_LEARNING_RATE = 2e-05
REL_LEARNING_RATE_DECAY_SPEED = [1, 1, 0.75, 0.5, 0.25, 0.1, 0.075, 0.05, 0.025, 0.01]

REL_LINEAR_SIZE = 512

REL_DROPOUT_P = 0.1
REL_ACTIVATION_FUNCTION = "PReLU"
REL_WEIGHT_DECAY = 0.01 # default = 0.01

In [28]:
rel_logger = NeptuneLogger(
    api_key=NEPTUNE_API_TOKEN,
    project_name=NEPTUNE_PROJECT_NAME,
    close_after_fit=False,
)

try:
    for i in range(4):
        print(f"--------- EXPERIMENT {i} ---------")
    
        rel_classifier = rel_trainer = None
        gc.collect()
        torch.cuda.empty_cache()
    
        rel_trainer = LightningTrainer(
            gpus=GPUS,
            min_epochs=REL_MIN_EPOCHS,
            max_epochs=REL_MAX_EPOCHS,
            default_root_dir=CHECKPOINT_DIR,
            reload_dataloaders_every_epoch=True, # needed as we loop over a file,
            deterministic=False,
            checkpoint_callback=False,
            logger=rel_logger
        )
    
        rel_classifier = RelationClassifier(
            pretrained_language_model=PRETRAINED_MODEL,
            dataset_name=DATASET_NAME,
            batch_size=REL_BATCH_SIZE,
            learning_rate=REL_LEARNING_RATE,
            decay_lr_speed=REL_LEARNING_RATE_DECAY_SPEED,
            dropout_p=REL_DROPOUT_P,
            activation_function=REL_ACTIVATION_FUNCTION,
            weight_decay=REL_WEIGHT_DECAY,
            linear_size=REL_LINEAR_SIZE,
        )
    
        rel_trainer.fit(rel_classifier)
        rel_trainer.test(rel_classifier)

except Exception as e:
    rel_logger.experiment.stop(str(e))
    raise e

else:
    rel_logger.experiment.stop()

# Train 2 classifiers independently then test together

In [None]:
def test_together(experiment_no: int, logger, b_classifier: BinaryClassifier, r_classifier: RelationClassifier, dataset_name: str = DATASET_NAME,
                  bin_batch_size = BIN_BATCH_SIZE, batch_size: int = REL_BATCH_SIZE):
    
    b_classifier.freeze()
    r_classifier.freeze()

    true_answer = []

    # run binary classifier
    print("Running binary classifier")
    dataset = GenericDataset(dataset_name, subset="test", batch_size=bin_batch_size, label_transform="none")
    binary_classify_results = { criteria: [] for criteria in b_classifier.thresholds.keys() }

    for input_data, true_label  in tqdm(dataset.as_batches(), total=len(dataset)):
        # append true answers
        true_answer += true_label.tolist()

        # run bin classifier
        logits = b_classifier(**input_data)
        y_hat = torch.sigmoid(logits)
        for criteria, threshold in b_classifier.thresholds.items():
            label = b_classifier.yhat_to_label(y_hat, threshold)
            binary_classify_results[criteria] += label.tolist()

    # run relation classifier
    print("Running relation classifier")
    dataset = GenericDataset(dataset_name, subset="test", batch_size=batch_size, label_transform="none")
    relation_classify_result = []

    for input_data, true_label  in tqdm(dataset.as_batches(), total=len(dataset)):
        logits = r_classifier(**input_data)
        label = r_classifier.logits_to_label(logits) + 1
        relation_classify_result += label.tolist()

    # combine results
    print("Combining results")
    proposed_answer = {}
    for criteria in b_classifier.thresholds.keys():
        results = zip(relation_classify_result, binary_classify_results[criteria])
        final_label = [relation_result if bin_result else 0 for relation_result, bin_result in results]
        proposed_answer[criteria] = final_label

    # log metric
    final_metrics = {}
    for criteria in b_classifier.thresholds.keys():
        pa = proposed_answer[criteria]
        
        final_metrics.update({
            f"test_combined_{criteria}_acc": accuracy_score(true_answer, pa),
            f"test_combined_{criteria}_pre_micro": precision_score(true_answer, pa, average="micro"),
            f"test_combined_{criteria}_rec_micro": recall_score(true_answer, pa, average="micro"),
            f"test_combined_{criteria}_f1_micro": f1_score(true_answer, pa, average="micro"),
            f"test_combined_{criteria}_pre_macro": precision_score(true_answer, pa, average="macro"),
            f"test_combined_{criteria}_rec_macro": recall_score(true_answer, pa, average="macro"),
            f"test_combined_{criteria}_f1_macro": f1_score(true_answer, pa, average="macro"),
        })
        
        fig = BaseClassifier.plot_confusion_matrix(pa, true_answer)
        logger.experiment.log_image(f"test_combined_{criteria}_confusion_matrix", fig)

    for k, v in final_metrics.items():
        print(f"{k}: {v * 100}")

    for k, v in final_metrics.items():
        logger.experiment.log_metric(k, v)
    
    # run the offical scorer
    scorer = get_official_scorer(experiment_no, logger)
    if scorer:
        scorer.score(proposed_answer)
    else:
        print("No official scorer found")

In [30]:
combine_logger = NeptuneLogger(
    api_key=NEPTUNE_API_TOKEN,
    project_name=NEPTUNE_PROJECT_NAME,
    close_after_fit=False,
)

try:
    for i in range(4):
        print(f"--------- EXPERIMENT {i} ---------")
    
        # clean up
        bin_classifier = bin_trainer = rel_classifier = rel_trainer = None
        gc.collect()
        torch.cuda.empty_cache()
    
        # relation classifier
        rel_trainer = LightningTrainer(
            gpus=GPUS,
            min_epochs=REL_MIN_EPOCHS,
            max_epochs=REL_MAX_EPOCHS,
            default_root_dir=CHECKPOINT_DIR,
            reload_dataloaders_every_epoch=True, # needed as we loop over a file,
            deterministic=False,
            checkpoint_callback=False,
            logger=combine_logger
        )
        rel_classifier = RelationClassifier(
            pretrained_language_model=PRETRAINED_MODEL,
            dataset_name=DATASET_NAME,
            batch_size=REL_BATCH_SIZE,
            learning_rate=REL_LEARNING_RATE,
            decay_lr_speed=REL_LEARNING_RATE_DECAY_SPEED,
            dropout_p=REL_DROPOUT_P,
            activation_function=REL_ACTIVATION_FUNCTION,
            weight_decay=REL_WEIGHT_DECAY,
            linear_size=REL_LINEAR_SIZE,
        )
        rel_trainer.fit(rel_classifier)
    
        # binary classifier
        bin_trainer = LightningTrainer(
            gpus=GPUS,
            min_epochs=BIN_MIN_EPOCHS,
            max_epochs=BIN_MAX_EPOCHS,
            default_root_dir=CHECKPOINT_DIR,
            reload_dataloaders_every_epoch=True, # needed as we loop over a file,
            deterministic=False,
            checkpoint_callback=False,
            logger=combine_logger,
        )
        bin_classifier = BinaryClassifier(
            pretrained_language_model=PRETRAINED_MODEL,
            dataset_name=DATASET_NAME,
            batch_size=BIN_BATCH_SIZE,
            learning_rate=BIN_LEARNING_RATE,
            decay_lr_speed=BIN_LEARNING_RATE_DECAY_SPEED,
            dropout_p=BIN_DROPOUT_P,
            activation_function=BIN_ACTIVATION_FUNCTION,
            weight_decay=BIN_WEIGHT_DECAY,
            linear_size=BIN_LINEAR_SIZE,
        )
        bin_trainer.fit(bin_classifier)
        
        # test together
        test_together(i, combine_logger, bin_classifier, rel_classifier)

except Exception as e:
    combine_logger.experiment.stop(str(e))
    raise e

else:
    combine_logger.experiment.stop()