# GPT-3 Question Evaluation

#### Import All Necessary Packages

In [2]:
import openai
import tensorflow as tf
import pandas as pd
import time
import os

#### Input User's OpenAI API key to enable querying fine-tuned model

In [None]:
os.environ['OPENAI_API_KEY'] = '' #API Key Here

#### Read In CSV File to Pandas Dataframe and Prepare for Predictions

In [14]:
def prepare_data(filepath): #file path to a csv file where the last column is model generated questions
    """
    Takes in filepath leading to csv file containing automatically generated questions and prepares a dataframe for GPT-3 Prediction

    filepath: path to a csv file where the final column contains generated questions. 
    
    return: tuple(data, question_column) data is a dataframe generated from the input csv file with rows dropped that do not contain a question. question_column
    returns key name for column containing generated questions for use later in prediction task. 
    """    
    data = pd.read_csv(filepath) #reading in file to a pandas dataframe
    question_column = data.keys()[-1] #getting the key name of the column contatining questions
    data = data.dropna(axis=0, subset=[question_column]) #removes any row from the dataframe that doesn't contain a generated question
    return data, question_column

Example Implementation

In [None]:
data, question_column = prepare_data('filepath.csv')

#### Run predictions using fine-tuned GPT model

In [49]:
def predict(row, question_column, model, delay=False):
    """
    Takes a single row from a dataframe which contains a generated question and returns the prediction of a fine-tuned GPT-3 model

    row: row of dataframe containing generated question
    question_column (string): key name of dataframe column containing questions
    model (string): OpenAI model identifier 
    delay (boolean): Enables sleep function. Only necessary if utilizing free tier of OpenAI API access as it limits requests per minute

    return: Prediction from GPT-3 Model. 1 corresponds to a prediction of 'sound', 0 corresponds to a prediction of 'not sound' 
    """  
    question = row[question_column]
    ft_model = model
    res = openai.Completion.create(model=ft_model, prompt=question, max_tokens=1, temperature=0, logprobs=2)
    if delay:
        time.sleep(1)
    return res['choices'][0]['text']

In [None]:
def make_predictions(data, question_column, model, delay=False):
    """
    Takes in dataframe returns the prediction of a fine-tuned GPT-3 model

    row: row of dataframe containing generated question
    question_column (string): key name of dataframe column containing questions
    model (string): OpenAI model identifier 
    delay (boolean): Enables sleep function. Only necessary if utilizing free tier of OpenAI API access as it limits requests per minute

    return: Dataframe with predictions in new column named 'predicted_label' 
    """  
    data['predicted_label'] = data.apply(lambda row: predict(row, question_column, model))
    index_names = data[ (data['predicted_label'] == 1) or (data['predicted_label'] == 0)].index #removes rows where prediction model makes an incorrect predicition (not a 1 or 0)
    data.drop(index_names, inplace = True)
    return data

Example implementation and write to csv file

In [None]:
model = 'ada:ft-personal-2023-04-24-01-17-14'
df_with_preds = make_predictions(data, question_column, model)
df_with_preds.to_csv('filepath_with_predictions')

#### Print Counts of Sound and Not Sound Predictions by GPT-3 Model

In [None]:
sound_count = len(df_with_preds.query("predicted_label == 1"))
not_sound_count = len(df_with_preds.query("predicted_label == 0"))
percentage_sound  = sound_count / (sound_count + not_sound_count)
print('Count of sound predictions: ' + str(sound_count))
print('Count of not sound predictions: ' + str(not_sound_count))
print('Percentage rated sound: ' + str(percentage_sound) + '%')

#### Randomly Sample N Rows from Dataframe and Write to new CSV File for Human Evaluation

In [10]:
N = 100
rand_samples = df_with_preds.sample(N)
rand_samples.to_csv('filepath_with_human_eval')