In [None]:
https://arxiv.org/pdf/2210.13952

In [None]:
import re
import torch
from transformers import pipeline

def extract_triplets_knowgl(text):
    """Extract triplets from KnowGL model output"""
    triplets = []

    # KnowGL separates multiple triplets with $
    for triplet_str in text.split('$'):
        triplet_str = triplet_str.strip()
        if not triplet_str:
            continue

        # Updated regex to match the actual KnowGL format without spaces around pipes
        match = re.match(r'\[\(([^)]+)\)\|([^|]+)\|\(([^)]+)\)\]', triplet_str)
        if not match:
            continue

        subject_part, relation, object_part = match.groups()

        # Extract subject components (mention#label#type)
        subject_components = subject_part.split('#')
        if len(subject_components) >= 3:
            subject = subject_components[0].strip()
            subject_type = subject_components[2].strip()
        else:
            subject = subject_components[0].strip()
            subject_type = "Unknown"

        # Extract object components (mention#label#type)
        object_components = object_part.split('#')
        if len(object_components) >= 3:
            object_ = object_components[0].strip()
            object_type = object_components[2].strip()
        else:
            object_ = object_components[0].strip()
            object_type = "Unknown"

        relation = relation.strip()

        if subject and relation and object_:
            triplets.append({
                'head': subject,
                'head_type': subject_type,
                'relation': relation,
                'tail': object_,
                'tail_type': object_type
            })

    return triplets

# Device selection
device_to_use = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'cuda:' + str(device_to_use) if device_to_use != -1 else 'cpu'}")

# Initialize KnowGL pipeline
print("Loading KnowGL model...")
knowgl_extractor = pipeline(
    'text2text-generation',
    model='ibm-research/knowgl-large',
    tokenizer='ibm-research/knowgl-large',
    device=device_to_use
)
print("KnowGL model loaded successfully!")
