In [10]:
import os
import json
import logging
from pathlib import Path
from dotenv import load_dotenv
from datasets import load_dataset
from tqdm import tqdm
import pandas as pd
from utils.openai_endpoints import query_openai_model


from utils.prompts import gen_qa_prompt

from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential



# Set up logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)

In [2]:
resp = query_openai_model("Are you up? reply in json")
print(resp)

('{\n  "response": "Yes, I am here and ready to assist you!"\n}', CompletionUsage(completion_tokens=19, prompt_tokens=24, total_tokens=43, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))


In [2]:
# Load the dataset
logger.info("Loading dataset...")
dataset = load_dataset("preetam7/dynamic_kgqa")

# Work with test split
test_data = list(dataset['test'])
results = []

In [6]:
len(test_data[0]['subgraph'])

2029

In [12]:
prompt = gen_qa_prompt(test_data[0]['subgraph'])

print(prompt)

You are an AI assistant tasked with generating question-answer pairs from knowledge graph triples. Your goal is to create natural, human-like questions and their corresponding answers based on the provided graph data.

Task Overview:
Generate **multi-hop, complex Q&A pairs** where the questions appear simple and natural but require reasoning across multiple connected relationships within the graph to infer the answer.

Guidelines for Generating Q&A Pairs:
1. **Question Design**:
- Questions should utilize multiple connected relationships in the graph, requiring multi-hop reasoning.
- Avoid single-hop or trivial questions directly derived from a single triple.
- The answer should be an entity or node in the graph.

2. **Multi-Hop Reasoning**:
- Use paths connecting entities indirectly through multiple relationships to infer answers.
- Questions should reflect meaningful and interesting connections within the graph.
- Aim for question with at least 4 hops or higher whenever possible.

3.

In [13]:
res = query_openai_model(prompt)
print(res)

('{\n  "valid_qa_pairs": true,\n  "number_of_qa_pairs": 5,\n  "qa_pairs": [\n    {\n      "question": "Which taxon does a llama ultimately belong to?",\n      "answer": "http://yago-knowledge.org/resource/Even-toed_ungulate",\n      "supporting_path": [\n        {\n          "subject": "http://yago-knowledge.org/resource/Llama",\n          "predicate": "http://schema.org/parentTaxon",\n          "object": "http://yago-knowledge.org/resource/Lama__u0028_genus_u0029_"\n        },\n        {\n          "subject": "http://yago-knowledge.org/resource/Lama__u0028_genus_u0029_",\n          "predicate": "http://schema.org/parentTaxon",\n          "object": "http://yago-knowledge.org/resource/Camelidae"\n        },\n        {\n          "subject": "http://yago-knowledge.org/resource/Camelidae",\n          "predicate": "http://schema.org/parentTaxon",\n          "object": "http://yago-knowledge.org/resource/Even-toed_ungulate"\n        }\n      ]\n    },\n    {\n      "question": "What is the pa