In [None]:
import openai
import os
import pickle
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
import json
import hashlib

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [None]:
def generate_exam(domain, exam_topics, num_questions, prompt_template_fname, llm):

    “”"
    Given a topic in a domain generate num_questions question,answer pairs
    about the topic.
    “”"

    response_schemas = []
    for n in range(1,num_questions+1):
        question_schema =  ResponseSchema(name=f"""Question {n}""",
                                          description=f"""Question number {n} of the exam"""
                                         )
        response_schemas.append(question_schema)
        answer_schema =  ResponseSchema(name=f"""Teacher Answer {n}""",
                                          description=f"""Answer number {n} of the exam"""
                                         )
        response_schemas.append(answer_schema)

    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    format_instructions = output_parser.get_format_instructions()


    prompt_path = os.path.join(os.getenv("PROMPTS"),prompt_template_fname)
    with open(prompt_path,"r") as file:
        template_string = file.read()
    prompt_template = ChatPromptTemplate.from_template(template_string)


    for idx, row in tqdm(exam_topics.iterrows()):

        message = prompt_template.format_messages(
                    topic=row["exam_topic"],
                    domain=domain,
                    num_questions=num_questions,
                    format_instructions=format_instructions)

        response = llm(message)
        output_dict = output_parser.parse(response.content)
        output_dict["topic_hash"] = hashlib.md5(row["exam_topic"].encode()).hexdigest()
        output_dict["topic"]=row["exam_topic"]


        path = os.path.join(os.getenv("EXAM_TOPICS"),f"""topic_{idx+1}_exam_qa.json""")
        with open(path, "w") as file:
            json.dump(output_dict, file)

In [None]:
def get_exam_topics():
    """
    Reads topics from txt files and adds to a dataframe. Each topic can be in a separate file
    """
    exam_topics = []
    #path where we store the exam topic txt files
    exam_topics_path = os.getenv("EXAM_TOPICS")

    #read in the exam topic files and return as a pandas DataFrame
    for file in list(filter(lambda f: f.endswith(".txt"),os.listdir(exam_topics_path))):
        with open(os.path.join(exam_topics_path,file),'r') as file:
            exam_topics.append(file.read())
    exam_topics = pd.DataFrame(exam_topics)
    exam_topics.columns = ["exam_topic"]
    return exam_topics
    

## Generate exam

In [None]:
#model = "gpt-3.5-turbo"
model = "gpt-4-turbo-preview"
exam_topics = get_exam_topics()
domain = "HR and workforce transformation"
num_questions = 4
prompt_template_fname = "teacher_generate_exam_prompt.txt"
llm = ChatOpenAI(temperature=0.0, model=model)

generate_exam(domain, exam_topics, num_questions, prompt_template_fname, llm)