### A Dataset for Hyper-Relational Extraction and a Cube-Filling Approach

GitHub: https://github.com/declare-lab/HyperRED

In [None]:
!git clone https://github.com/declare-lab/HyperRED.git
!cd HyperRED && git checkout ef3a847
!cp -a HyperRED/* .

# Install requirements but use the existing torch (remove if not in Colab)
!sed -i '/torch/d' requirements.txt
!pip install -q -r requirements.txt

In [None]:
from data_process import download_data, process_many

def colab_demo_truncate_data(path: str, limit: int):
    # Reduce data size for faster training in demo
    with open(path) as f:
        lines = [x for x in f]
    with open(path, "w") as f:
        for x in lines[:limit]:
            f.write(x)

download_data("data/hyperred/")
colab_demo_truncate_data("data/hyperred/train.json", limit=5000)
process_many("data/hyperred/", "data/processed")

In [None]:
# Data Exploration

from data_process import Data

def explore_data(path: str):
    data = Data.load(path)
    data.analyze()

    for s in data.sents[:3]:
        print(f"\nText: {s.text}")
        print(f"Tokens: {s.tokens}")
        for r in s.relations:
            fn = lambda span: " ".join(s.tokens[span[0] : span[1]])
            print(f"\tRelation: {r}")
            print(f"\tHead: {fn(r.head)}, Relation: {r.label}, Tail: {fn(r.tail)}")
            for q in r.qualifiers:
                print(f"\t\tQualifier: {q.label}, Value: {fn(q.span)}")
        print()

explore_data("data/hyperred/train.json")

In [None]:
# Download Pretrained Model
!wget https://github.com/declare-lab/HyperRED/releases/download/v1.0.0/cube_model.zip
!unzip cube_model.zip

In [None]:
# Use Pretrained Model for Generation

from prediction import run_predict

texts = [
    "Leonard Parker received his PhD from Harvard University in 1967 .",
    "Szewczyk played 37 times for Poland, scoring 3 goals .",
]
preds = run_predict(texts, path_checkpoint="cube_model")
preds.save("preds.json")
explore_data("preds.json")

In [None]:
# Evaluation Scoring

from prediction import run_predict, score_preds

path_gold = "data/hyperred/test.json"
path_pred = "preds.json"

data = Data.load(path_gold)
texts = [s.text for s in data.sents]
preds = run_predict(texts, path_checkpoint="cube_model")
preds.save(path_pred)
score_preds(path_pred, path_gold)

In [None]:
# Train CubeRE Model from scratch
# Note that you can remove the train_batch_size and gradient_accumulation_steps arguments if your GPU has enough memory (eg 32 GB)
!python training.py --save_dir ckpt/cube_prune_20 --data_dir data/processed --prune_topk 20 --config_file config.yml --train_batch_size 16 --gradient_accumulation_steps 2