In [6]:


from tqdm import tqdm
from dotenv import load_dotenv
import os
import openai

tqdm.pandas()
load_dotenv()

uni_api_key = os.getenv('UNI_API_KEY')

client = openai.OpenAI(
    api_key=uni_api_key,
    base_url="https://llms-inference.innkube.fim.uni-passau.de" )


In [2]:
import pandas as pd


def entity2id_codex():
    with open('codex/entities.json', 'r') as file:
            entities = pd.read_json(file, orient='index')
    entities_dict = {index:row['label'] for index, row in entities.iterrows()}
    with open('codex/relations.json', 'r') as file:
            relations = pd.read_json(file, orient='index')
    relations_dict = {index:row['label'] for index, row in relations.iterrows()}
    return entities_dict, relations_dict

In [3]:
input_file = "humans_wikidata/humans_wikidata/type2relation2type_ttv.txt"

unique_relations = set()

with open(input_file, "r", encoding="utf-8") as f:
    for line in f:
        parts = line.strip().split("\t")
        if len(parts) >= 2:
            relation = parts[1].strip()
            if "/relation/" in relation:
                rel_id = relation.split("/")[-1]
                unique_relations.add(rel_id)

sorted_relations = sorted(unique_relations)

In [4]:
entities_dict, relations_dict = entity2id_codex()

In [None]:
human_wikidata_relations = []

for rel in sorted_relations:
    relation = relations_dict.get(rel, None)
    if relation:
        human_wikidata_relations.append(relation)

In [2]:
with open('relations.txt', 'r') as f:
    lines = f.readlines()
    relations = [line.strip() for line in lines]

In [3]:
relations

['cause of death',
 'unmarried partner',
 'place of birth',
 'place of death',
 'named after',
 'cast member',
 'drug used for treatment',
 'member of sports team',
 'medical condition',
 'headquarters location',
 'capital',
 'religion',
 'chairperson',
 'occupation',
 'sibling',
 'residence',
 'spouse',
 'languages spoken, written, or signed',
 'country of origin',
 'member of political party',
 'educated at',
 'ethnic group',
 'chief executive officer',
 'head of state',
 'country',
 'creator',
 'director',
 'child',
 'movement',
 'author',
 'instrument',
 'member of',
 'continent',
 'practiced by',
 'place of burial',
 'diplomatic relation',
 'founded by',
 'employer',
 'influenced by',
 'field of work']

In [7]:
def classify_relation(prompt: str, model='llama3.1'):
    
    instruction = """
    You are a helpful assistant. Determine whether the given relation is a static or a dynamic relation.
    A static relation is a relation that doesn't change over time, means that the associated facts remain the same over time.
    While a dynamic relation changes over time, means that the associated facts change over time and have to be updated. 
    example: Cristiano Ronaldo is Born in Funchal, Portugal is a static fact, while Cristiano Ronaldo plays for Al-Nassr is a dynamic fact.
    Answer with "static" or "dynamic" only, no explanation is required.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": instruction
            },

            {
                "role": "user",
                "content": "relation: " + prompt
            }]
        )
    return response.choices[0].message.content


In [13]:
relations_dict = {}
for rel in tqdm(relations, desc="Classifying relations"):
    relations_dict[rel] = classify_relation(rel)


print(relations_dict)

Classifying relations:   0%|          | 0/40 [00:00<?, ?it/s]

Classifying relations: 100%|██████████| 40/40 [00:18<00:00,  2.14it/s]

{'cause of death': 'static', 'unmarried partner': 'dynamic', 'place of birth': 'static', 'place of death': 'static', 'named after': 'static', 'cast member': 'dynamic', 'drug used for treatment': 'static', 'member of sports team': 'dynamic', 'medical condition': 'dynamic', 'headquarters location': 'dynamic', 'capital': 'static', 'religion': 'static', 'chairperson': 'dynamic', 'occupation': 'dynamic', 'sibling': 'static', 'residence': 'dynamic', 'spouse': 'dynamic', 'languages spoken, written, or signed': 'dynamic', 'country of origin': 'static', 'member of political party': 'dynamic', 'educated at': 'static', 'ethnic group': 'static', 'chief executive officer': 'Dynamic', 'head of state': 'dynamic', 'country': 'static', 'creator': 'dynamic', 'director': 'dynamic', 'child': 'dynamic', 'movement': 'dynamic', 'author': 'dynamic', 'instrument': 'static', 'member of': 'dynamic', 'continent': 'static', 'practiced by': 'dynamic', 'place of burial': 'static', 'diplomatic relation': 'dynamic', '




In [14]:
import json
with open("relations.dict", "w") as f:
    json.dump(relations_dict, f, indent=4)