## Few shot STEM solution examples

In [9]:
import openai 
import pandas as pd
import time
import argparse
import os

### Configuration

In [10]:
openai.api_key = os.getenv('OpenAI_API_Key')
courses_to_few_shot = ['18.01', '18.02', '18.03', '6.042', '18.05', '18.06', 'COMS3251']
MATH_sections_to_few_shot = ['MATH_Algebra', 'MATH_Counting_&_Probability', 'MATH_Intermediate_Algebra', 
                             'MATH_Number_Theory', 'MATH_Prealgebra', 'MATH_Precalculus']
questions_per_course = 10
questions_per_MATH_section = 10

args = ['Codex_Few_Shot', 'GPT3_CoT_One_Shot', 'Do_MATH', 'Do_Courses']

#Will use this many few-shot examples if possible: (if fewer are solved, use as many as possible)
few_shot_examples_desired = 5
codex_engine = "code-davinci-002"
gpt3_engine = "text-davinci-002"
engine_temperature = 0
engine_topP = 0
few_shot_max_tokens = 256
gpt3_CoT_max_tokens = 1000
codex_time_delay = 3
gpt3_time_delay = 1
CoT = "Let's think step by step."

## Helper functions

In [37]:
def execute_few_shot(courses, questions_per):
    """
    Runs few-shot on questions_per questions for each course in courses.
    """
    for course in courses:
        course_location = course + ' results.csv'
        #initializing new columns in csv
        results = pd.read_csv(course_location)
        results['Few-Shot Input'] = ''
        results['Few-Shot Output'] = ''
        results['Few-Shot Evaluation'] = ''
        results.to_csv(course_location, index=False)

        for i in range(0, questions_per):
            k = few_shot_examples_desired

            #correct via zero-shot:
            if results.iloc[i]['Zero-Shot Evaluation'] == 1:
                print('no few shot needed for ' + course + ' question ' + str(i+1))
                few_shot_input = 'n/a'
                few_shot_output = 'n/a'

            #incorrect via zero-shot:
            elif results.iloc[i]['Zero-Shot Evaluation'] == 0:
                few_shot_input = ''
                print('doing few-shot for ' + course + ' question ' + str(i+1) + '...')
                for closest in results.iloc[i]["Most Similar Questions"].strip('][').split(', '):
                    closest_index = int(closest) - 1
                    if closest_index < 10:
                        if results.iloc[closest_index]['Zero-Shot Evaluation'] == 1 and k > 0:
                            few_shot_input += results.iloc[closest_index]['Codex Input']
                            few_shot_input += results.iloc[closest_index]['Codex Output']+'\n\n'
                            k -= 1
                few_shot_input += results.iloc[i]['Codex Input']
                start = time.time()
                time.sleep(codex_time_delay) #to avoid an openai.error.RateLimitError
                few_shot_output = openai.Completion.create(engine = codex_engine, 
                                                        prompt = few_shot_input, 
                                                        max_tokens = few_shot_max_tokens, 
                                                        temperature = engine_temperature, 
                                                        top_p = engine_topP)['choices'][0]['text']
                print('Codex API call time: ' + str(time.time()-start) + '\n')

            #columns not properly labelled with 1's and 0's:
            else:
                print(results.iloc[i]['Zero-Shot Evaluation'])
                print('''A Question not labeled 1 for correct or 0 for incorrect was detected. 
                You must go back and label all Codex Zero-Shot questions as correct or incorrect''')
                raise ValueError

            results.loc[i, 'Few-Shot Input'] = few_shot_input
            results.loc[i, 'Few-Shot Output'] = few_shot_output
            results.to_csv(course_location, index=False)

def execute_GPT3_CoT_one_shot(courses, questions_per):
    """
    Runs one-shot CoT on questions_per questions for each course in courses.
    """
    for course in courses:
        course_location = course + ' results.csv'
        #initializing new columns in csv
        results = pd.read_csv(course_location)
        results['GPT-3 CoT Few-Shot Input'] = ''
        results['GPT-3 CoT Few-Shot Output'] = ''
        results['GPT-3 CoT Few-Show Evaluation'] = ''
        results.to_csv(course_location, index=False)

        for i in range(questions_per):
            closest_index = int(results.iloc[i]["Most Similar Questions"].strip('][').split(', ')[0]) - 1
            if closest_index < 10:
                similar_question = results.iloc[closest_index]["Original Question"]
                similar_answer = results.iloc[closest_index]["Actual Solution"]
                original_question = results.iloc[i]["Original Question"]
                print("Running GPT-3 CoT one-shot on " + course + ' question ' + str(i+1) + '...')
                start = time.time()
                time.sleep(gpt3_time_delay) #to avoid an openai.error.RateLimitError
                gpt3_CoT_input = 'Q: ' + similar_question + '\nA: ' + str(similar_answer) + '\n\nQ: ' + original_question + "\nA: " + CoT
                gpt3_CoT_output = openai.Completion.create(engine = gpt3_engine,
                                                       prompt = gpt3_CoT_input,
                                                       max_tokens = gpt3_CoT_max_tokens,
                                                       temperature = engine_temperature,
                                                       top_p = engine_topP)['choices'][0]['text']
                print('GPT-3 API call time: ' + str(time.time()-start) + '\n')
                results.loc[i, 'GPT-3 CoT Few-Shot Input'] = gpt3_CoT_input
                results.loc[i, 'GPT-3 CoT Few-Shot Output'] = gpt3_CoT_output
                results.to_csv(course_location, index=False)

### Example few shot are based on GPT-3 zero shot generations that are labeled

In [38]:
# You must go back and label all Codex Zero-Shot questions as correct or incorrect
if 'Do_Courses' in args:
    if 'Codex_Few_Shot' in args:
        execute_few_shot(courses_to_few_shot, questions_per_course)
    if 'GPT3_CoT_One_Shot' in args:
        execute_GPT3_CoT_one_shot(courses_to_few_shot, questions_per_course)

Running GPT-3 CoT one-shot on 18.01 question 1...
GPT-3 API call time: 3.8204050064086914

Running GPT-3 CoT one-shot on 18.01 question 2...
GPT-3 API call time: 5.784275770187378

Running GPT-3 CoT one-shot on 18.01 question 4...
GPT-3 API call time: 2.3395957946777344

Running GPT-3 CoT one-shot on 18.01 question 7...
GPT-3 API call time: 7.7062952518463135

Running GPT-3 CoT one-shot on 18.02 question 1...
GPT-3 API call time: 7.473547697067261

Running GPT-3 CoT one-shot on 18.02 question 2...
GPT-3 API call time: 4.416622638702393

Running GPT-3 CoT one-shot on 18.02 question 3...
GPT-3 API call time: 2.9668188095092773

Running GPT-3 CoT one-shot on 18.02 question 4...
GPT-3 API call time: 28.397748708724976

Running GPT-3 CoT one-shot on 18.02 question 5...
GPT-3 API call time: 4.185836553573608

Running GPT-3 CoT one-shot on 18.02 question 6...
GPT-3 API call time: 19.588162660598755

Running GPT-3 CoT one-shot on 18.02 question 7...
GPT-3 API call time: 5.940133333206177

Runn

In [None]:
if 'Do_MATH' in args:
    if 'Codex_Few_Shot' in args:
        execute_few_shot(MATH_sections_to_few_shot, questions_per_MATH_section)
    if 'GPT3_CoT_One_Shot' in args:
        execute_GPT3_CoT_one_shot(MATH_sections_to_few_shot, questions_per_MATH_section)

no few shot needed for MATH_Algebra question 1
doing few-shot for MATH_Algebra question 2...
Codex API call time: 10.31976580619812

no few shot needed for MATH_Algebra question 3
doing few-shot for MATH_Algebra question 4...
Codex API call time: 16.428293704986572

no few shot needed for MATH_Algebra question 5
doing few-shot for MATH_Algebra question 6...
Codex API call time: 15.203335285186768

doing few-shot for MATH_Algebra question 7...
