In [None]:
from langchain import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
import pickle
from utils import Text, apply_history
from datetime import datetime
from tqdm import tqdm

In [None]:
import langchain
langchain.debug = True

In [None]:
#load text data
data = pickle.load(open("/workspace/data/ARTS_only_texts_94.pkl", "rb"))
determined_pairs = pickle.load(open("/workspace/data/determined_pairs_376.pkl", "rb"))

texts = {t_id : Text(t_id, text[0]) for t_id, text in data.iterrows()}

In [None]:
load_dotenv("/workspace/.env")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [None]:
prompt_template = """
I will present you a text, and I want you to score its simplicity between 0 and 1. 
A text that is very easy to understand should receive a score of 0.0; a very challenging text should have a score of approximately 1.0. The following guidelines should be taken into account for the decision: 
Imagine you are writing an exam where you are allowed to google and where the task is to understand the given text.
Keep in mind the following criteria for the simplicity of a text:
Generation of cognitive load, time to understand, confidence to answer questions about, difficulty to reformulate without changing the meaning.
The text to score is delimited by ```

       Text:
       ```
       {TEXT}
       ```

       The answer should only contain a number between 0 and 1.
       Please answer without any further text, just one number with high precision.
"""


In [None]:
model_name = "gpt-4-1106-preview"
#model_name = "gpt-3.5-turbo-1106"

In [None]:
prompt= PromptTemplate(template=prompt_template, input_variables=["TEXT"])
model = ChatOpenAI(temperature=0, model=model_name)
output_parser = StrOutputParser()

chain = prompt | model | output_parser

In [None]:
gpt_history = {}

In [None]:
match_id = 0
with tqdm(total=len(texts)) as pbar:
    for i, text in texts.items():
        system_time = datetime.now().strftime("%H:%M:%S")

        text = text.get_text()

        res = chain.invoke({"TEXT": text})

        entry = (text, res, system_time)
        gpt_history[match_id] = entry
        match_id+=1
        pbar.update(1)

In [None]:
path = f"/workspace/{model_name}_individual_history.pkl"
pickle.dump(gpt_history, open(path, "wb"))