![title](Question_Rubric_2.jpg)

In [6]:
import openai
import os
import pandas as pd 
import numpy as np
import time
openai.api_key = os.getenv("OPENAI_API_KEY")

In [7]:
df = pd.read_csv('expert_fcds_eval.csv')
df = df.drop(columns=['File_name', 'Unnamed: 0', 'Unnamed: 0.1'])

In [8]:
display(df.head())

Unnamed: 0,Concepts,Questions
0,Active Learning: A data science pattern that ...,. Expected Model Change: How does the expected...
1,"AI consulting firm, EVP framework, Automotive...",. What are some measurable metrics used to ass...
2,"AI consulting firm, EVP framework, Automotive...",. What is the significance of completed transa...
3,Interpretability: The importance of interpret...,What are the effects of trade-offs within a m...
4,Model Assessment and Model Selection are key ...,What is the process of testing and validating...


In [9]:
results = []
questions = df['Questions'].to_list()
concepts = df['Concepts'].to_list()
iwfs = [{"criteria": "gramatical accuracy",
    "definition": "question text is grammatically accurate and logical to reader"},
    {"criteria": "ambiguous or unclear information ",
    "definition": "questions is written in clear, unambiguous language. It is clear what is being asked and what is expected in the answer"}, 
    {"criteria": "gratuious information",
     "definition": "avoids unnecessary information in the stem that is not required to answer the question"}, 
    {"criteria": "pedagogical value",
     "definition": "question is of educational value to students in a data science course"}, #update to reflect course you are interested in
    {"criteria": "covers key concept",
     "definition": "question relates closely to an identified key concept for the given block of text"},
    ]

In [None]:
#Loop over each question, then for each question, call the IWF criteria one at a time on it.
done = False
counter = 0
error_counter = 0
for q in questions:
    print(counter)
    results.append(q)
    for i in iwfs:
        #Run this as a while loop with error handling code, as sometimes the GPT-4 API goes down, returning an error, in which 
        #we'll need to wait and retry our call
        while(done == False):
            try:
                o = openai.ChatCompletion.create(
                  model="gpt-4", 
                  messages=[
                    {"role": "user", "content": f'Begin your response with yes or no, does this question satisfy the criteria relating to {i["criteria"]}: {i["definition"]}? Explain why. {q}'},
                  ],
                  max_tokens=100
                 )
                time.sleep(1)
                done = True 
            except Exception as e:
                error_counter += 1
                print(f'Error: {error_counter}, Message: {str(e)}')
                time.sleep(15)
        done = False
        results.append(o)
    while(done == False):
        try:
            o = openai.ChatCompletion.create(
              model="gpt-4", 
              messages=[
                {"role": "user", "content": f'Start your answer with the concept. Given this list of concepts: {concepts[counter]}, which is most closely related, if any, to this question: {q}'},
                ],
                max_tokens=100
              )
            time.sleep(1)
            done = True 
        except Exception as e:
            error_counter += 1
            print(f'Error: {error_counter}, Message: {str(e)}')
            time.sleep(15)
    done = False
    results.append(o)
    while(done == False):
        try:
            o = openai.ChatCompletion.create(
              model="gpt-4", 
              #update with current course selection
              messages=[
                {"role": "user", "content": f'Begin your response with either good, fair, or poor, how well is this question written for testing a students understanding in a data science course. Explain why. {q}'},
                ],
                max_tokens=100
              )
            time.sleep(1)
            done = True 
        except Exception as e:
            error_counter += 1
            print(f'Error: {error_counter}, Message: {str(e)}')
            time.sleep(15)
    done = False
    results.append(o)
    counter += 1
rows = []
r = []
indz = 0
for res in results :
    try:
        r.append(res.choices[0].message.content)
    except:
        r.append(res)
        
    #Once we've created a row, r, that contains the question text and 19 criteria, append it to our greater rows list
    if indz == 1:
        rows.append(r)
        r = []
        indz = 0
    else:
        indz = indz + 1


columns = [
    'question',
    'gramatical_accuracy',
    'ambiguous_or_unclear',
    'gratuitous_information',
    'pedagogical_value',
    'covers_key_concept',
    'concept_covered',
    'question_grade'
]

pd_results = pd.DataFrame(rows, columns=columns)
pd_results.to_csv("gpt-4_fcds_pedagogical_results.csv")