In [None]:
!pip install -q openai evaluate transformers
!pip install -q sacrebleu rouge_score jiwer

In [2]:
import pandas as pd
import numpy as np
import openai, json, time
import evaluate

In [3]:
from google.colab import files

In [4]:
!cp /utils/cf_custom_functions.py /content

In [5]:
import cf_custom_functions as cf

### ***ChatGPT - Generate Transcriptions***

In [6]:
# Set up OpenAI API credentials and parameters for GPT-3
openai.api_key = "xxx"
#parameters = {"engine": "text-davinci-003", "temperature": 0.7, "max_tokens": 256}
parameters = {"engine": "text-davinci-003", "temperature": 0.0, "max_tokens": 256}

def get_transcription(formula: str) -> list:
    formula_text = formula
    prompt = "Transcript the formula in spoken text:"
    prompt_text = prompt + " " + formula_text
    parameters["prompt"] = prompt_text
    response = openai.Completion.create(**parameters)
    spoken_text = response.choices[0].text.strip().split("\n")
    return spoken_text

In [7]:
def create_transcriptions(df):
  df = df.copy()
  y_preds = []
  # iterate over the dataframe line by line
  for index, row in df.iterrows():
      # get the formula from the 'formulas' column
      formula = row["formula"]
      # get the transcriptions using the 'get_transcription' function
      transcription = get_transcription(formula)
      print(transcription)
      y_preds.append(transcription)
  y_preds = np.array(y_preds)
  df["prediction"] = y_preds
  return df

In [None]:
df_test = cf.load_test_data("/data/datafiles/test_data.json")

In [None]:
df_preds = create_transcriptions(df_test)

In [None]:
df_preds.to_csv("../predictions/ChatGPT_predictions.csv")

## ***ChatGPT Evaluation on Testset***

In [None]:
metric_chatgpt = cf.compute_metrics_from_prediction_file("../predictions/ChatGPT_predictions.csv")

In [None]:
cf.save_evaluation_metrics("ChatGPT_latex-to-text_pretrained_new", metric_chatgpt,"../metrics/NLG_metrics_new.json")

### ***GPT3.5 Turbo and GPT4 - Generate Transcriptions***

In [None]:
import openai
openai.api_key = "xxx"
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        #{"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Translate this LaTeX expression to English: T \equiv \frac { a ^{ \dagger } } { \sqrt { a ^{ \dagger } a } }"}]
)
full_answer = response.choices[0].message.content
#answer = full_answer.split(":")[1]
print(full_answer)

In [None]:
import openai

#model = "gpt-4", "gpt-3.5-turbo"

def get_transcription_gptx(formula,model):

  openai.api_key = "xxx"
  response = openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
        #{"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Translate this LaTeX expression to English:"+ formula}]
  )

  transcription = response.choices[0].message.content
  return transcription

In [23]:
def generate_transcriptions_with_gptx(df,model):
  model = model
  openai.api_key = "xxx"
  df = df.copy()
  y_preds = []
  # iterate over the dataframe line by line
  for index, row in df.iterrows():
    formula = row["formula"]
    transcription = get_transcription_gptx(formula,model)
    print(transcription)
    y_preds.append(transcription)

  y_preds = np.array(y_preds)
  df["prediction"] = y_preds
  return df

In [None]:
df_gpt4 = generate_transcriptions_with_gptx(df_test,"gpt4")

In [None]:
df_gpt4.to_csv("../predictions/gpt4_predictions.csv")

In [None]:
df_gpt35t = generate_transcriptions_with_gptx(df_test,"gpt-3.5-turbo")

In [None]:
df_gpt35t.to_csv("../predictions/gpt35t_predictions.csv")

In [None]:
metric_gpt35t = cf.compute_metrics_from_prediction_file("../predictions/gpt35t_predictions.csv")

In [None]:
cf.save_evaluation_metrics("GPT35t_latex-to-text_pretrained", metric_gpt35t,"../metrics/NLG_metrics_new.json")

In [None]:
metric_gpt4 = cf.compute_metrics_from_prediction_file("../predictions/gpt4_predictions.csv")

In [None]:
cf.save_evaluation_metrics("GPT4_latex-to-text_pretrained", metric_gpt4,"../metrics/NLG_metrics_new.json")