In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("DFKI-SLT/nyt-multi") 
train = dataset["train"]
test = dataset["test"]
rel_set=set()
for i in range(len(test)):
    s=test[i]['relations'][0]['type']
    result = s.split('/')[3]  # Index 3 gives you the part after the second slash
    rel_set.add(result)

In [3]:
rel_set

{'administrative_divisions',
 'advisors',
 'capital',
 'children',
 'company',
 'contains',
 'country',
 'founders',
 'location',
 'major_shareholder_of',
 'major_shareholders',
 'nationality',
 'neighborhood_of',
 'people',
 'place_founded',
 'place_lived',
 'place_of_birth',
 'place_of_death',
 'religion',
 'teams'}

In [4]:
test[0].keys()  # Display the keys of the first test example

dict_keys(['tokens', 'spo_list', 'pos_tags', 'relations'])

In [5]:
test[2]['spo_list']

[['Iowa', '/location/location/contains', 'Des Moines']]

In [6]:
test[0]['relations']

[{'h': {'text': 'Bobby Fischer', 'start': 14, 'end': 16, 'type': 'PERSON'},
  't': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  'type': '/people/person/nationality'},
 {'h': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/location/country/capital'},
 {'h': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/location/location/contains'},
 {'h': {'text': 'Bobby Fischer', 'start': 14, 'end': 16, 'type': 'PERSON'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/people/deceased_person/place_of_death'}]

In [7]:
s=test[0]['spo_list'][0][1]
s.split('/')

['', 'people', 'person', 'nationality']

In [8]:
ent_labels=set()
true=[]
entity_labels_true_count={}
for i in range(len(test)):
    temp=[]
    for j in test[i]['spo_list']:
        s=j[1].split('/')
        if s[1] not in entity_labels_true_count:
            entity_labels_true_count[s[1]]=0
        if s[2] not in entity_labels_true_count:
            entity_labels_true_count[s[2]]=0
        entity_labels_true_count[s[1]]+=1
        entity_labels_true_count[s[2]]+=1
        ent_labels.add(s[1])
        ent_labels.add(s[2]) 
        temp.append(j[0])
        temp.append(j[2])
    true.append(temp) 
        

In [9]:
entity_labels_true_count

{'people': 1657,
 'person': 1955,
 'location': 10380,
 'country': 1288,
 'deceased_person': 136,
 'administrative_division': 580,
 'business': 604,
 'neighborhood': 394,
 'company': 135,
 'sports': 34,
 'sports_team_location': 17,
 'sports_team': 17,
 'company_shareholder': 33,
 'ethnicity': 2}

In [10]:
rel_set=list(rel_set)
rel_set

['location',
 'place_of_death',
 'place_lived',
 'nationality',
 'advisors',
 'neighborhood_of',
 'founders',
 'major_shareholder_of',
 'place_founded',
 'people',
 'children',
 'religion',
 'teams',
 'capital',
 'country',
 'contains',
 'administrative_divisions',
 'place_of_birth',
 'company',
 'major_shareholders']

In [None]:
from openai import OpenAI
import json
import time
from tqdm import tqdm
import pandas as pd  # make sure df is defined with 'Content' column

# Set OpenAI API key
client = OpenAI(api_key = "")  # Replace with your actual API key

# Function: extract named_entities given allowed entity labels
def extract_named_entities_by_labels(paragraph, allowed_labels):
    allowed_labels_str = ", ".join(allowed_labels)

    system_prompt = (
        "You are a named entity recognition (NER) assistant. "
        "Your task is to extract named entities from a given paragraph, "
        "but only include entities whose type is one of the following: "
        f"{allowed_labels_str}. "
        "Return ONLY a JSON list of entity strings that match the allowed labels. "
        "Do not include the labels in the output. Keep it JSON parsable."
    )

    user_prompt = f"""
Paragraph:

{paragraph}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # or "gpt-4o-mini"
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.2,
        )

        reply = response.choices[0].message.content.strip()

        if reply.startswith("```json"):
            reply = reply[len("```json"):].strip()
        if reply.endswith("```"):
            reply = reply[:-len("```")].strip()

        return json.loads(reply)

    except Exception as e:
        print("Error with paragraph:", paragraph)
        print("Exception:", e)
        return []

    # Example: allowed labels
allowed_entity_labels = ['company', 'country', 'location', 'neighborhood', 'people', 'person', 'sports']

# Loop through DataFrame and extract entities
gpt_named_entities = []

for i in tqdm(range(len(test))):
    paragraph = ' '.join(test[i]['tokens'])
    entities = extract_named_entities_by_labels(paragraph, allowed_entity_labels)
    gpt_named_entities.append(entities)

  0%|          | 0/5000 [00:00<?, ?it/s]

100%|██████████| 5000/5000 [1:09:59<00:00,  1.19it/s]


In [14]:
with open("nyt_gpt_named_entities_output.json", "w") as f:
    json.dump(gpt_named_entities, f, indent=2)

In [15]:
gpt_named_entities[0]

['Bobby Fischer', 'Reykjavik', 'Iceland']

In [16]:
recall_avg=[]
precision_avg=[]
common2=0
for i in range(len(gpt_named_entities)):
    true_set=set(true[i])
    pred_set=set(gpt_named_entities[i])
    
    if len(true_set) == 0 and len(pred_set) == 0:
        recall_avg.append(1.0)
        precision_avg.append(1.0)
    else:
        count=0
        for m in true_set:
            for n in pred_set:
                if m in n or n==m:
                    count+= 1
        recall = count / len(true_set) if len(true_set) > 0 else 0
        precision = count / len(pred_set) if len(pred_set) > 0 else 0
        common2 += count
        recall_avg.append(recall)
        precision_avg.append(precision)

In [17]:
sum(recall_avg) / len(recall_avg), sum(precision_avg) / len(precision_avg)

(0.8971235714285717, 0.5872980432312767)

In [18]:
def process(example):
    ner=[]
    for ent in example['relations']:
        head=ent['h']
        tail=ent['t']    
        text=head['text']
        start=head['start']
        end=head['end']
        ent_type=head['type']
        ner.append([start, end-1, ent_type, text])
        text=tail['text']
        start=tail['start']
        end=tail['end']
        ent_type=tail['type']
        ner.append([start, end-1, ent_type, text])

    example['ner'] = ner  # Add ner to the original example
    return example
processed_data = [process(example) for example in test]

In [19]:
processed_data[0]['relations']

[{'h': {'text': 'Bobby Fischer', 'start': 14, 'end': 16, 'type': 'PERSON'},
  't': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  'type': '/people/person/nationality'},
 {'h': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/location/country/capital'},
 {'h': {'text': 'Iceland', 'start': 35, 'end': 36, 'type': 'LOCATION'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/location/location/contains'},
 {'h': {'text': 'Bobby Fischer', 'start': 14, 'end': 16, 'type': 'PERSON'},
  't': {'text': 'Reykjavik', 'start': 33, 'end': 34, 'type': 'LOCATION'},
  'type': '/people/deceased_person/place_of_death'}]

In [20]:
relations_true=[]
relation_labels_true_count={}
for i in range(len(processed_data)):
    temp=[]
    for j in processed_data[i]['relations']:
        s=j['type']
        rel_type = s.split('/')[3]
        temp.append([j['h']['text'], j['t']['text'], rel_type])
        if rel_type not in relation_labels_true_count:
            relation_labels_true_count[rel_type]=0
        relation_labels_true_count[rel_type]+=1
    relations_true.append(temp)  

In [21]:
relation_labels_true_count

{'nationality': 589,
 'capital': 708,
 'contains': 4059,
 'place_of_death': 136,
 'children': 42,
 'place_of_birth': 270,
 'place_lived': 612,
 'administrative_divisions': 580,
 'country': 580,
 'company': 436,
 'neighborhood_of': 394,
 'place_founded': 36,
 'founders': 63,
 'teams': 17,
 'location': 17,
 'major_shareholder_of': 33,
 'major_shareholders': 33,
 'people': 1,
 'ethnicity': 1,
 'advisors': 3,
 'religion': 5,
 'geographic_distribution': 1}

In [22]:
relations_true[0]

[['Bobby Fischer', 'Iceland', 'nationality'],
 ['Iceland', 'Reykjavik', 'capital'],
 ['Iceland', 'Reykjavik', 'contains'],
 ['Bobby Fischer', 'Reykjavik', 'place_of_death']]

In [None]:
from openai import OpenAI
import json
import re

# Initialize OpenAI client
client = OpenAI(api_key = "")  # Replace with your actual API key

def extract_relation_labels_with_gpt_entities(paragraph, entities, relation_labels):
    """
    Extract RDF triples using GPT-4o/mini with entity spans.

    Args:
        paragraph (str): Full input paragraph
        entities (list): Each entity as [start, end, entity_type, entity_text]
        relation_labels (list): Allowed relation labels

    Returns:
        dict: {"relation_triples": [[head, relation, tail], ...]}
    """
    # Convert entity structure into readable text
    entity_descs = [
        f"[{start}, {end}, {etype}, {text}]"
        for start, end, etype, text in entities
    ]
    entity_str = "\n".join(entity_descs)
    relation_str = ", ".join(relation_labels)

    system_prompt = (
        "You are an expert in information extraction. "
        "Given a paragraph, a list of named entities with character spans and types, and a list of allowed relation labels, "
        "extract RDF relation triples in the format [head, relation, tail].\n\n"
        "- Head and tail must be from the provided entity list.\n"
        "- The relation must be from the relation_labels list.\n"
        "- The output must be ONLY a JSON object like:\n"
        '{ "relation_triples": [ ["Entity1", "Relation", "Entity2"], ... ] }\n'
        "- Do NOT include any explanation or extra text."
    )

    user_prompt = f"""
Paragraph:
\"\"\"
{paragraph}
\"\"\"

Entities (format: [start, end, type, entity]):
{entity_str}

Relation labels:
{relation_str}
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # or "gpt-4o-mini"
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.2,
        )

        reply = response.choices[0].message.content.strip()

        # Extract JSON block
        json_match = re.search(r'\{.*\}', reply, re.DOTALL)
        if json_match:
            json_text = json_match.group(0).strip()
            return json.loads(json_text)
        else:
            print("No JSON found.")
            print("Reply:", reply)
            return {"relation_triples": []}

    except Exception as e:
        print("Exception:", e)
        return {"relation_triples": []}
gpt_relation_triples = []
relation_labels = ['location',
 'place_of_death',
 'place_lived',
 'nationality',
 'advisors',
 'neighborhood_of',
 'founders',
 'major_shareholder_of',
 'place_founded',
 'people',
 'children',
 'religion',
 'teams',
 'capital',
 'country',
 'contains',
 'administrative_divisions',
 'place_of_birth',
 'company',
 'major_shareholders']


for i in tqdm(range(len(processed_data))):
    paragraph = ' '.join(processed_data[i]['tokens'])
    entities = processed_data[i]['ner']
    output= extract_relation_labels_with_gpt_entities(paragraph, entities, relation_labels)
    gpt_relation_triples.append(output['relation_triples'])

100%|██████████| 5000/5000 [1:08:47<00:00,  1.21it/s]


In [43]:
with open("nyt_gpt_relation_triples_output.json", "w") as f:
    json.dump(gpt_relation_triples, f, indent=2)

In [44]:
output

{'relation_triples': [['Jersey City', 'neighborhood_of', 'New Jersey']]}

In [None]:
recall_avg=[]
precision_avg=[]
common=0
ours=0

for i in range(len(relations_true)):
    true_set=relations_true[i]
    pred_set=gpt_relation_triples[i]
    d={}
    for item in pred_set:
        if (item[0],item[1]) not in d and (item[1],item[0]) not in d:
            d[(item[0],item[1])]=item[2]
    
    ours+=len(d)
    if len(true_set) == 0 and len(pred_set) == 0:
        recall_avg.append(1.0)
        precision_avg.append(1.0)
    else:
        count=0
        for m in true_set:
            for n in pred_set:
                if m[0]==n[0] and m[1]==n[2] and (m[2]==n[1]):
                    count+= 1
        recall = count/ len(true_set) if len(true_set) > 0 else 0
        precision = count / len(pred_set) if len(d) > 0 else 0
        common += count
        recall_avg.append(recall)
        precision_avg.append(precision)

In [46]:
sum(recall_avg) / len(recall_avg), sum(precision_avg) / len(precision_avg)

(0.16340497835497841, 0.20436055555555557)

65.43

In [44]:
from pair2rel import Pair2Rel

from tqdm import tqdm
model = Pair2Rel.from_pretrained("chapalavamshi022/pair2rel")
import torch

# Force usage of GPU 1
device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.device = device 
relations_all=[]
labels = ['contains','includes','place_lived','nationality','company','capital','neighborhood_of','place_of_birth','country',
            'place_of_death','place_founded']
relation_labels_predicted_count={}
for i in tqdm(range(len(processed_data))):
    # required_labels = []
    # for token in processed_data[i]['tokens']:
    #     if token in rel_set:
    #         required_labels.append(token)
    try:
    
        relations = model.predict_relations(processed_data[i]['tokens'], labels, threshold=0.0, ner=pred_ner[i], top_k=1)

        sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
        temp=[]
        for item in sorted_data_desc:
            head=' '.join(item['head_text'])
            tail=' '.join(item['tail_text'])
            if head == tail:
                continue
            if item['label'] not in relation_labels_predicted_count:
                relation_labels_predicted_count[item['label']]=0
            relation_labels_predicted_count[item['label']]+=1
            temp.append([head,tail,item['label']])

        relations_all.append(temp)
    except:
        relations_all.append([])

print("Success! ✅")


100%|██████████| 5000/5000 [07:04<00:00, 11.77it/s]

Success! ✅





In [52]:
recall_avg=[]
precision_avg=[]
common=0
ours=0

for i in range(len(relations_all)):
    true_set=relations_true[i]
    pred_set=relations_all[i]
    d={}
    for item in pred_set:
        if (item[0],item[1]) not in d and (item[1],item[0]) not in d:
            d[(item[0],item[1])]=item[2]
    
    ours+=len(d)
    if len(true_set) == 0 and len(pred_set) == 0:
        recall_avg.append(1.0)
        precision_avg.append(1.0)
    else:
        count=0
        for m in true_set:
            for n in pred_set:
                if (m[0] in n[0] or n[0] in m[0]) and (m[1] in n[1] or n[1] in m[1]) and (m[2]==n[2] or (m[2]=='administrative_divisions' and n[2]=='country') or (m[2]=='contains' and n[2]=='includes') or (m[2]=='country' and n[2]=='capital')):
                    count+= 1
        recall = count/ len(true_set) if len(true_set) > 0 else 0
        precision = count / len(d) if len(d) > 0 else 0
        common += count
        recall_avg.append(recall)
        precision_avg.append(precision)

In [53]:
sum(recall_avg) / len(recall_avg), sum(precision_avg) / len(precision_avg)

(0.3301430772005775, 0.19016944166944294)

In [None]:
from pair2rel import Pair2Rel

from tqdm import tqdm
model = Pair2Rel.from_pretrained("chapalavamshi022/pair2rel")
import torch

# Force usage of GPU 1
device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.device = device 
relations_all=[]
labels = ['contains','includes','place_lived','nationality','company','capital','neighborhood_of','place_of_birth','country',
            'place_of_death','place_founded']
relation_labels_predicted_count={}
for i in tqdm(range(len(processed_data))):
    # required_labels = []
    # for token in processed_data[i]['tokens']:
    #     if token in rel_set:
    #         required_labels.append(token)
    
    relations = model.predict_relations(processed_data[i]['tokens'], labels, threshold=0.0, ner=processed_data[i]['ner'], top_k=2)

    sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
    temp=[]
    for item in sorted_data_desc:
        head=' '.join(item['head_text'])
        tail=' '.join(item['tail_text'])
        if head == tail:
            continue
        if item['label'] not in relation_labels_predicted_count:
            relation_labels_predicted_count[item['label']]=0
        relation_labels_predicted_count[item['label']]+=1
        temp.append([head,tail,item['label']])

    relations_all.append(temp)
        
print("Success! ✅")


100%|██████████| 5000/5000 [05:23<00:00, 15.46it/s]

Success! ✅





In [54]:
recall_avg=[]
precision_avg=[]
common=0
ours=0

for i in range(len(relations_all)):
    true_set=relations_true[i]
    pred_set=relations_all[i]
    d={}
    for item in pred_set:
        if (item[0],item[1]) not in d and (item[1],item[0]) not in d:
            d[(item[0],item[1])]=item[2]
    
    ours+=len(d)
    if len(true_set) == 0 and len(pred_set) == 0:
        recall_avg.append(1.0)
        precision_avg.append(1.0)
    else:
        count=0
        for m in true_set:
            for n in pred_set:
                if m[0]==n[0] and m[1]==n[1] and (m[2]==n[2] or (m[2]=='administrative_divisions' and n[2]=='country') or (m[2]=='contains' and n[2]=='includes') or (m[2]=='country' and n[2]=='capital')):
                    count+= 1
        recall = count/ len(true_set) if len(true_set) > 0 else 0
        precision = count / len(pred_set) if len(d) > 0 else 0
        common += count
        recall_avg.append(recall)
        precision_avg.append(precision)

In [68]:
sum(recall_avg) / len(recall_avg), sum(precision_avg) / len(precision_avg)

(0.6480175036075041, 0.2525411111111112)

In [69]:
from pair2rel import Pair2Rel

from tqdm import tqdm
model = Pair2Rel.from_pretrained("chapalavamshi022/pair2rel")
import torch

# Force usage of GPU 1
device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.device = device 
relations_all=[]
labels = ['contains','includes','place_lived','nationality','company','capital','neighborhood_of','place_of_birth','country',
            'place_of_death','place_founded']
relation_labels_predicted_count={}
for i in tqdm(range(len(processed_data))):
    # required_labels = []
    # for token in processed_data[i]['tokens']:
    #     if token in rel_set:
    #         required_labels.append(token)
    
    relations = model.predict_relations(processed_data[i]['tokens'], labels, threshold=0.0, ner=processed_data[i]['ner'], top_k=3)

    sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
    temp=[]
    for item in sorted_data_desc:
        head=' '.join(item['head_text'])
        tail=' '.join(item['tail_text'])
        if head == tail:
            continue
        if item['label'] not in relation_labels_predicted_count:
            relation_labels_predicted_count[item['label']]=0
        relation_labels_predicted_count[item['label']]+=1
        temp.append([head,tail,item['label']])

    relations_all.append(temp)
        

print("Success! ✅")


100%|██████████| 5000/5000 [05:14<00:00, 15.92it/s]

Success! ✅





In [70]:
recall_avg=[]
precision_avg=[]
common=0
ours=0

for i in range(len(relations_all)):
    true_set=relations_true[i]
    pred_set=relations_all[i]
    d={}
    for item in pred_set:
        if (item[0],item[1]) not in d and (item[1],item[0]) not in d:
            d[(item[0],item[1])]=item[2]
    
    ours+=len(d)
    if len(true_set) == 0 and len(pred_set) == 0:
        recall_avg.append(1.0)
        precision_avg.append(1.0)
    else:
        count=0
        for m in true_set:
            for n in pred_set:
                if m[0]==n[0] and m[1]==n[1] and (m[2]==n[2] or (m[2]=='administrative_divisions' and n[2]=='country') or (m[2]=='contains' and n[2]=='includes') or (m[2]=='country' and n[2]=='capital')):
                    count+= 1
        recall = count/ len(true_set) if len(true_set) > 0 else 0
        precision = count / len(pred_set) if len(d) > 0 else 0
        common += count
        recall_avg.append(recall)
        precision_avg.append(precision)

In [71]:
sum(recall_avg) / len(recall_avg), sum(precision_avg) / len(precision_avg)

(0.8932503030303031, 0.23985740740740738)