![title](Question_Rubric_2.jpg)

In [1]:
import openai
import os
import pandas as pd 
import numpy as np
import time
openai.api_key = os.getenv("OPENAI_API_KEY")

In [2]:
df = pd.read_excel('Chem_Questions.xls')
df = df.drop(columns=['File_name'])

In [3]:
results = []
questions = df['Questions'].to_list()
concepts = df['Concepts'].to_list()
iwfs = [{"criteria": "gramatical accuracy",
     "definition": "question text is grammatically accurate and logical to reader"},
     {"criteria": "ambiguous or unclear information ",
     "definition": "questions is written in clear, unambiguous language. It is clear what is being asked and what is expected in the answer"}, 
    {"criteria": "gratuious information",
     "definition": "avoids unnecessary information in the stem that is not required to answer the question"}, 
    {"criteria": "pedagogical value",
     "definition": "question is of educational value to students in chemistry"}, 
    {"criteria": "covers key concept",
     "definition": "question relates closely to an identified key concept for the given block of text"},
    ]

In [4]:
#Loop over each question, then for each question, call the IWF criteria one at a time on it.
done = False
counter = 0
error_counter = 0
for q in questions:
    print(counter)
    results.append(q)
    for i in iwfs:
        #Run this as a while loop with error handling code, as sometimes the GPT-4 API goes down, returning an error, in which 
        #we'll need to wait and retry our call
        while(done == False):
            try:
                o = openai.ChatCompletion.create(
                  model="gpt-4", 
                  messages=[
                    {"role": "user", "content": f'Begin your response with yes or no, does this question satisfy the criteria relating to {i["criteria"]}: {i["definition"]}? Explain why. {q}'},
                  ]
                 )
                done = True 
            except:
                error_counter += 1
                print(f'error: {error_counter}')
                time.sleep(15)
        done = False
        results.append(o)
    while(done == False):
        try:
            o = openai.ChatCompletion.create(
              model="gpt-4", 
              messages=[
                {"role": "user", "content": f'Start your answer with the concept. Given this list of concepts: {concepts}, which is most closely related, if any, to this question: {q}'},
                ]
              )
            done = True 
        except:
            error_counter += 1
            print(f'error: {error_counter}')
            time.sleep(15)
    done = False
    results.append(o)
    while(done == False):
        try:
            o = openai.ChatCompletion.create(
              model="gpt-4", 
              messages=[
                {"role": "user", "content": f'Begin your response with either good, fair, or poor, how well is this question written for testing a students understanding in a chemistry course. Explain why. {q}'},
                ]
              )
            done = True 
        except:
            error_counter += 1
            print(f'error: {error_counter}')
            time.sleep(15)
    done = False
    results.append(o)
    counter += 1
rows = []
r = []
indz = 0
for res in results :
    try:
        r.append(res.choices[0].message.content)
    except:
        r.append(res)
        
    #Once we've created a row, r, that contains the question text and 19 criteria, append it to our greater rows list
    if indz == 7:
        rows.append(r)
        r = []
        indz = 0
    else:
        indz = indz + 1


columns = [
    'question',
    'gramatical_accuracy',
    'ambiguous_or_unclear',
    'gratuitous_information',
    'pedagogical_value',
    'covers_key_concept',
    'concept_covered',
    'question_grade'
]

pd_results = pd.DataFrame(rows, columns=columns)
pd_results.to_csv("gpt-4_results.csv")

0
1
2
3
4
5
6
error: 1
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
