In [None]:
from langchain import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import pickle
from utils import Text, apply_history
from datetime import datetime
from tqdm import tqdm

In [None]:
import langchain
langchain.debug = True

In [None]:
#load text data
data = pickle.load(open("/workspace/data/ARTS_only_texts_94.pkl", "rb"))
determined_pairs = pickle.load(open("/workspace/data/determined_pairs_376.pkl", "rb"))

texts = {t_id : Text(t_id, text[0]) for t_id, text in data.iterrows()}

In [None]:
load_dotenv("/workspace/.env")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [None]:
def build_dyn_prompt(texts):
    texts_str = ""
    for key in texts.keys():
        text_str = f"text id {key}: ```" + "{Text_" + str(key) + "} ``` \n "
        texts_str += text_str

    return texts_str

In [None]:
prompt_template = """
I will present you with a numbered list of texts. It is important that each text has a corresponding id. 
I would like you to give me back a sorted list of these ids. The criterion for the sorting should be the simplicity of the texts. 
Please use the following guidelines to evaluate the simplicity of the texts: 
Imagine you are writing an exam where you are allowed to google and where the task the given texts.
Which of the texts: generates less cognitive load?, can you understand more quickly?, are you more confident to answer questions about?, is easier for you to reformulate without changing the meaning?
The texts will be delimited by ````


""" + build_dyn_prompt(texts) + """

       The answer should only be a list of ids sorted by simplicity. Please answer without any further text.
"""


In [None]:
model_name = "gpt-4-1106-preview"
#model_name = "gpt-3.5-turbo-1106"

In [None]:
input_variables = [f"Text_{text.get_t_id()}" for _, text in texts.items()]

In [None]:
prompt= PromptTemplate(template=prompt_template, input_variables=input_variables)
model = ChatOpenAI(temperature=0, model=model_name)
output_parser = StrOutputParser()

chain = prompt | model | output_parser

In [None]:
id_text_dict = {f"Text_{text.get_t_id()}" : text.get_text() for _, text in texts.items()}

In [None]:
#invoke
res = chain.invoke(id_text_dict)

In [None]:
autoranking = [int(id_) for id_ in res.split(",")]

In [None]:
path = f"/workspace/{model_name}_auto_ranking_history.pkl"
pickle.dump(autoranking, open(path, "wb"))