# Generating Sample Data for DSPy

## Notebook Setup

In [2]:
# Importing the necessary Python libraries
import os

import pandas as pd

from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_openai import ChatOpenAI

In [5]:
# Loading in the synthetic knowledge items
df_kis = pd.read_csv('synthetic_knowledge_items.csv')
df_kis = df_kis[['ki_topic', 'ki_text']]

In [11]:
# Instantiating the chat model
chat_model = ChatOpenAI(api_key = os.environ['PERPLEXITY_API_KEY'],
                        base_url = 'https://api.perplexity.ai',
                        model = 'llama-3.1-70b-instruct')

KeyError: 'PERPLEXITY_API_KEY'

In [6]:
# Defining the question-answer generation prompt
QA_GENERATION_PROMPT = '''Task:
Generate a single plausible question and answer pair based on the following knowledge item. Ensure that the question is one that would be commonly asked by a human.

Instructions:
    - Export the question and answer pair as a JSON object.
    - Formulate an answer solely based on the information given in the knowledge item text.
    - Do not add any external knowledge.
    - Do not return any other text, like "Here is the question answer pair."
    - Please follow the example set below.

Example:
\{{
  "question": "What is the capital of France?",
  "answer": "Paris"
\}}

Knowledge item topic:
{ki_topic}

Knowledge item text:
{ki_text}
'''

# Creating the question-answer generation prompt template
qa_generation_prompt_template = ChatPromptTemplate.from_messages(messages = [
    HumanMessagePromptTemplate.from_template(template = QA_GENERATION_PROMPT)
])