In [1]:
import openai
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv
import os

In [2]:
load_dotenv("/Users/priyal/Documents/honours/.env/key.env")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
def generate_question(abstract):
    prompt = (
        "Imagine you are a 20-year-old looking for simple, practical health advice. "
        "Based on the following abstract, generate a natural-sounding question that a young adult might ask. "
        "Use casual and relatable language. Avoid technical or research-heavy phrasing.\n\n"
        f"Abstract: {abstract}\n\n"
        "Example questions:\n"
        "- I always feel tired even after sleeping. What could be causing this?\n"
        "- My skin breaks out a lot. Are there foods or habits that can help?\n"
        "- I want to focus better in class. Any natural ways to improve concentration?\n\n"
        "Generate a similar question based on the abstract above."
    )

    try:
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=50,
        )
        question = response.choices[0].message.content.strip()
        return question
    except Exception as e:
        print(f"Error generating question: {e}")
        return None


In [4]:
def generate_answer(abstract, title):
    prompt = (
        "Provide a simple, easy-to-understand answer to a 20-year-old based on the following abstract. "
        "Use clear, everyday language and make it relatable to a young adult. "
        "Avoid complex medical terms and keep it casual. "
        "Mention the research paper title as a citation in this format: 'According to [Paper Title], ...'.\n\n"
        f"Abstract: {abstract}\n\n"
        f"Paper Title: {title}\n\n"
        "Example answers:\n"
        "- According to 'The Science of Sleep and Energy', not getting enough sunlight during the day can mess up your sleep cycle. "
        "Try spending at least 15 minutes outside in the morning to reset your body clock.\n"
        "- 'Diet and Acne' suggests that eating too much sugar can make breakouts worse. Swapping soda for water and cutting back on processed snacks might help your skin.\n\n"
        "Now, generate a similar answer based on the abstract above."
    )

    try:
        client = openai.OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=150,
        )
        answer = response.choices[0].message.content.strip()
        return answer
    except Exception as e:
        print(f"Error generating answer: {e}")
        return None


In [5]:
def main(input_csv, output_csv):
    df = pd.read_csv(input_csv)
    df = df.dropna(subset=["Abstract"])
    df = df.head(1000)  

    dataset = []  

    for index, row in df.iterrows():
        title = row["Title"]
        abstract = row["Abstract"]

        question = generate_question(abstract)
        if not question:
            continue

        answer = generate_answer(abstract, title)
        if not answer:
            continue

        example = {"instruction": question, "input": abstract, "output": answer}
        dataset.append(example)

        print(f"✅ Generated Q&A pair {index+1}: {question}")  # New progress print

    print(f"\nProcessed {len(dataset)} QA pairs")
    
    dataset_df = pd.DataFrame(dataset)  
    dataset_df.to_csv(output_csv, index=False)
    print(f"📂 QA dataset saved to {output_csv}")

In [6]:
input_csv = "/Users/priyal/Documents/honours/datasets/extracted_papers_info.csv"
output_csv = "/Users/priyal/Documents/honours/datasets/qa_dataset_bigger_20yo.csv"

main(input_csv, output_csv)

✅ Generated Q&A pair 1: Can changing my lifestyle or diet with Ayurveda methods affect my genes and improve my overall health?
✅ Generated Q&A pair 2: I've heard that diabetes is more than just having high blood sugar levels. Can something like Ayurveda, or holistic approaches in general, help manage it better?
✅ Generated Q&A pair 3: I've heard that Ayurveda might help with managing epilepsy. Is there any truth to that?
✅ Generated Q&A pair 4: So, can technology like computers and stuff be used to understand how traditional medicine like Ayurveda works better?
✅ Generated Q&A pair 5: The abstract doesn't provide any information that I can use to formulate a question. Could you please provide a more detailed abstract?
✅ Generated Q&A pair 6: I get migraines a lot and normal treatments don't seem to work. Could something like Ayurvedic medicine help me out?
✅ Generated Q&A pair 7: I'm thinking of having a baby soon. Can Ayurvedic practices help me have a healthier pregnancy and delivery