In [None]:
from openai import OpenAI
import json
from tqdm.auto import tqdm
import itertools
import pandas as pd

In [None]:
# no need to convert to pandas
documents = session_state['episode_details']['chunks']

In [None]:
prompt_template = """
You emulate a user of our deep-pod application.
Formulate 5 questions this user might ask based on a provided text.
Make the questions specific to this text.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as few words as possible from the record. 

The record:

text: {text}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [None]:
client = OpenAI()

In [None]:
prompt = prompt_template.format(**documents[0])

In [None]:
prompt

In [None]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [None]:
questions = llm(prompt)

In [None]:
json.loads(questions)

In [None]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [None]:
results = {}

In [None]:
id_iterator = itertools.count(start=1)
for doc in tqdm(documents): 
    doc_id = str(next(id_iterator))
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

In [None]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [None]:
ground_truth = pd.DataFrame(final_results, columns=['id', 'question'])
ground_truth.to_csv('sample/ground-truth-retrieval.csv', index=False)