In [7]:
import openai
import os
import pickle
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
import json
import hashlib

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [8]:
def generate_dpo_rejected(language, prompt_template_fname, llm):
    
    response_schemas = []
    answer_schema =  ResponseSchema(name=f"""Student Answer""",
                                          description=f"""Student answer"""
                                         )
    response_schemas.append(answer_schema)

    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    format_instructions = output_parser.get_format_instructions()


    prompt_path = os.path.join(os.getenv("PROMPTS"),prompt_template_fname)
    with open(prompt_path,"r") as file:
        template_string = file.read()
    prompt_template = ChatPromptTemplate.from_template(template_string)

    qa_path = os.getenv("DPO_QA")
    qa_data = os.listdir(qa_path)

    for idx, qa in enumerate(qa_data):
        
        with open(os.path.join(qa_path,qa),'r') as f:
            try:
                data = json.loads(f.read())
                del data["Teacher Answer"]
                
                message = prompt_template.format_messages(
                    topic=data["topic"],
                    language=language,
                    question=data["Question"],
                    format_instructions=format_instructions
                    )
                response = llm(message)
                output_dict = output_parser.parse(response.content)
                output_dict["topic_hash"] = hashlib.md5(data["topic"].encode()).hexdigest()
                output_dict["topic"]=data["topic"]
                output_dict["Question"]=data["Question"]


                path = os.path.join(os.getenv("DPO_QA"),f"""topic_{idx}_qa_rejected.json""")
                with open(path, "w",  encoding='utf-8') as file:
                    json.dump(output_dict,   file, ensure_ascii=False)

            except Exception as e:
                pass



In [9]:
def generate_dpo_accepted(topics, language, prompt_template_fname, llm):

    """
    Given a topic generate num_questions question,answer pairs
    about the topic.
    """

    response_schemas = []
    
    question_schema =  ResponseSchema(name=f"""Question""",
                                          description=f"""Generated question"""
                                         )
    response_schemas.append(question_schema)
    answer_schema =  ResponseSchema(name=f"""Teacher Answer""",
                                          description=f"""Generated answer"""
                                         )
    response_schemas.append(answer_schema)

    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    format_instructions = output_parser.get_format_instructions()


    prompt_path = os.path.join(os.getenv("PROMPTS"),prompt_template_fname)
    with open(prompt_path,"r") as file:
        template_string = file.read()
    prompt_template = ChatPromptTemplate.from_template(template_string)


    for idx, row in tqdm(topics.iterrows()):

        message = prompt_template.format_messages(
                    topic=row["topic"],
                    language=language,        
                    format_instructions=format_instructions)

        response = llm(message)
        output_dict = output_parser.parse(response.content)
        output_dict["topic_hash"] = hashlib.md5(row["topic"].encode()).hexdigest()
        output_dict["topic"]=row["topic"]


        path = os.path.join(os.getenv("DPO_QA"),f"""topic_{idx+1}_qa_accepted.json""")
        with open(path, "w",  encoding='utf-8') as file:
            json.dump(output_dict,   file, ensure_ascii=False)

In [10]:
def get_dpo_topics():
    """
    Reads topics from txt files and adds to a dataframe. Each topic can be in a separate file
    """
    topics = []
    #path where we store the exam topic txt files
    topics_path = os.getenv("DPO_TOPICS")
    print(topics_path)
    #read in the exam topic files and return as a pandas DataFrame
    for file in list(filter(lambda f: f.endswith(".txt"),os.listdir(topics_path))):
        print(file)
        with open(os.path.join(topics_path,file),'r') as file:
            topics.append(file.read())
    topics = pd.DataFrame(topics)
    topics.columns = ["topic"]
    return topics
    

## Generate DPO data

In [23]:
accepted_answer_model = "gpt-3.5-turbo"
rejected_answer_model = os.getenv("LLM_URL")
#model = "gpt-4-turbo-preview"
topics = get_dpo_topics()
language = "Greek"
temperature = 0.2
max_tokens=512
random_seed = 1

#generate accepted
prompt_template_fname = "generate_qa_from_passage_prompt.txt"
llm = ChatOpenAI(temperature=temperature, model=accepted_answer_model)
generate_dpo_accepted(topics, language, prompt_template_fname, llm)

#generate rejected
prompt_template_fname = "generate_rejected_answer_from_passage_prompt.txt"
llm = ChatOpenAI(
        base_url=os.getenv("LLM_URL"),
        api_key="not-needed",
        temperature=temperature,
        max_tokens=max_tokens,
        model_kwargs={"seed": random_seed}
        )
generate_dpo_rejected(language, prompt_template_fname, llm)



/Users/vasilishatzopoulos/repositories/DPO_generator/documents/processed/dpo_topics
topic_1.txt
topic_3.txt
topic_2.txt
topic_4.txt


0it [00:00, ?it/s]

4it [00:13,  3.32s/it]


In [25]:
#generate rejected
prompt_template_fname = "generate_rejected_answer_from_passage_prompt.txt"
llm = ChatOpenAI(
        base_url=os.getenv("LLM_URL"),
        api_key="not-needed",
        temperature=temperature,
        max_tokens=max_tokens,
        model_kwargs={"seed": random_seed}
        )
generate_dpo_rejected(language, prompt_template_fname, llm)

