In [None]:
import os
import math
import numpy as np
import pandas as pd

In [None]:
import sys
sys.path.append("..\\parser")
import conll04_parser
import model

In [None]:
import torch
from torch.nn import functional as F

In [None]:
from sklearn.metrics import precision_recall_fscore_support

In [None]:
# Constants
NUM_CLASSES = 8 # Number of relation classes
NUM_EPOCH = 100

In [None]:
## TODO: Use GPU

In [None]:
def data_generator(group):
    data = conll04_parser.extract_data(group)
    for doc in data:
        # If this sentence has at least two entities for a possible relation
        if len(doc["entity_position"]) >= 2:
            new_entity_position = {}
            for entity in doc["entity_position"]:
                new_entity_position[entity] = (
                    doc["entity_position"][entity][0] + 1, # +1: space for CLS token
                    doc["entity_position"][entity][1] + 1  # +1: space for CLS token
                )
            # Add CLS and SEP to the sentence
            input_ids = [conll04_parser.CLS_TOKEN] + doc["data_frame"]["token_ids"].tolist() + [conll04_parser.SEP_TOKEN]
            e1_mask, e2_mask, labels = model.generate_entity_mask(len(input_ids), new_entity_position, doc["relations"])
            assert e1_mask.shape[0] == e2_mask.shape[0] == labels.shape[0]
            assert len(input_ids) == e1_mask.shape[1] == e2_mask.shape[1]
            yield {
                "input_ids": torch.tensor([input_ids]).long(), 
                "attention_mask": torch.ones((1, len(input_ids)), dtype=torch.long),
                "token_type_ids": torch.zeros((1, len(input_ids)), dtype=torch.long),
                "e1_mask": e1_mask,
                "e2_mask": e2_mask,
                "labels": labels
            }
            del e1_mask
            del e2_mask
            del labels

In [None]:
# Test data_generator()
generator = data_generator("train")
# Test on the first document ("1024")
test_inputs = next(generator)
assert test_inputs["input_ids"][0, 0] == conll04_parser.CLS_TOKEN
assert test_inputs["input_ids"][0, 1] == 2200
assert test_inputs["input_ids"][0, -2] == 1012
assert test_inputs["input_ids"][0, -1] == conll04_parser.SEP_TOKEN
assert torch.equal(test_inputs["e1_mask"][0, 22:24], torch.tensor([1, 1]))
assert torch.equal(test_inputs["e1_mask"][2, 25:28], torch.tensor([1, 1, 1]))
assert torch.equal(test_inputs["e1_mask"][4, 29:31], torch.tensor([1, 1]))
assert torch.equal(test_inputs["labels"], torch.tensor([0, 2, 0, 2, 0, 0]))

In [None]:
mre_model = model.BertForMre(NUM_CLASSES)

In [None]:
# Freeze all layers except for the last classifier layer on top
for param in mre_model.parameters():
    param.requires_grad = False
mre_model.classifier.weight.requires_grad = True
mre_model.classifier.bias.requires_grad = True

In [None]:
for param in mre_model.parameters():
    print("size:", param.shape)
    print(param.requires_grad)

In [None]:
from transformers import AdamW
optimizer = AdamW(mre_model.parameters(), lr=1e-5)

In [None]:
def validate_model():
    val_generator = data_generator("val")
    true_labels = []
    predicted_labels = []
    for inputs in val_generator:
        # forward
        outputs = mre_model(**inputs)
        true_labels += inputs["labels"].tolist()
        pred_labels = F.softmax(outputs.logits, dim=-1).argmax(dim=1)
        predicted_labels += pred_labels.tolist()
        assert len(predicted_labels) == len(true_labels)
        del inputs
        
    print("[validation]")
    result = pd.DataFrame(columns=["precision", "recall", "fbeta_score", "support"])
    result.loc["macro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="macro"))
    result.loc["micro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="micro"))
    print(result)

In [None]:
def train_model():
    for epoch in range(NUM_EPOCH):  # loop over the dataset multiple times
        true_labels = []
        predicted_labels = []

        for i, inputs in enumerate(data_generator("train"), 0):
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = mre_model(**inputs)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            # print statistics
            true_labels += inputs["labels"].tolist()
            pred_labels = F.softmax(outputs.logits, dim=-1).argmax(dim=1)
            predicted_labels += pred_labels.tolist()
            assert len(predicted_labels) == len(true_labels)
            if i % 1000 == 999:    # print every 1000 mini-batches
                print("[%d, %5d]" % (epoch + 1, i + 1))
                result = pd.DataFrame(columns=["precision", "recall", "fbeta_score", "support"])
                result.loc["macro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="macro"))
                result.loc["micro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="micro"))
                print(result)
                true_labels = []
                predicted_labels = []

            del inputs
            
        validate_model()

    print('Finished Training')

In [None]:
train_model()

In [None]:
def test_model():
    test_generator = data_generator("test")
    true_labels = []
    predicted_labels = []
    for inputs in test_generator:
        # forward
        outputs = mre_model(**inputs)
        true_labels += inputs["labels"].tolist()
        pred_labels = F.softmax(outputs.logits, dim=-1).argmax(dim=1)
        predicted_labels += pred_labels.tolist()
        assert len(predicted_labels) == len(true_labels)
        del inputs
    
    label_map = {v: k for k, v in conll04_parser.relation_encode.items()}
    classes = list(label_map.keys())
    precision, recall, fbeta_score, support = precision_recall_fscore_support(true_labels, predicted_labels, average=None, labels=classes)
    result = pd.DataFrame(index=[label_map[c] for c in classes])
    result["precision"] = precision
    result["recall"] = recall
    result["fbeta_score"] = fbeta_score
    result["support"] = support
    result.loc["macro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="macro"))
    result.loc["micro"] = list(precision_recall_fscore_support(true_labels, predicted_labels, average="micro"))
    
    print(result)
    return result

In [None]:
result = test_model()