# Individual Groups:

In [None]:
pip install -q OpenAI

In [1]:
pip install -q google-generativeai

In [None]:
pip install -q anthropic

In [8]:
import os
import csv
import time
import json
import re

# Read test parts and answer key

## Part 1:

In [6]:
test_path ="/content/USHistoryAnswerKey.txt"
try:
    with open(test_path, 'r', encoding='utf-8') as file:
         physics_test_part1 = file.read()
except UnicodeDecodeError:
    with open(test_path, 'r', encoding='cp1252') as file:
         physics_test_part1 = file.read()

## Part 2:

In [None]:
test_path = "/content/Physicstestpart2.txt"
try:
    with open(test_path, 'r', encoding='utf-8') as file:
        physics_test_part2 = file.read()
except UnicodeDecodeError:
    with open(test_path, 'r', encoding='cp1252') as file:
        physics_test_part2 = file.read()

## Part 3 (Don't run if n/a):

In [None]:
test_path = "/content/Physicstestpart3.txt"
try:
    with open(test_path, 'r', encoding='utf-8') as file:
        physics_test_part3 = file.read()
except UnicodeDecodeError:
    with open(test_path, 'r', encoding='cp1252') as file:
          physics_test_part3 = file.read()

## Part 4 (Don't run if n/a):

In [None]:
test_path = "/content/Physicstestpart4.txt"
try:
    with open(test_path, 'r', encoding='utf-8') as file:
         physics_test_part4 = file.read()
except UnicodeDecodeError:
    with open(test_path, 'r', encoding='cp1252') as file:
        physics_test_part4 = file.read()

## Answer Key:

In [7]:
Answers_path = "/content/USHistorytest.txt"
try:
    with open(Answers_path, 'r', encoding='utf-8') as file:
         physics_answers = file.read()
except UnicodeDecodeError:
    with open(Answers_path, 'r', encoding='cp1252') as file:
        physics_answers = file.read()

# LLMs

## GPT-4

In [None]:
from openai import OpenAI
client = OpenAI(api_key = "")

In [None]:
def initialize_csv_file(filename, subject):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([f"{subject}_Question Number", f"{subject}_Correctness", f"{subject}_Skill Assessed", f"{subject}_Time Taken"])

def update_csv(filename, results, subject):
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        # Writing each question's result as a new row
        for result in results:
            writer.writerow([f"{subject}_{result[0]}", result[1], result[2], result[3]])

def generate_combined_test_results(test_parts, answers):
    combined_model_answers = {}
    results = []
    start_time = time.time()

    for i, part in enumerate(test_parts):
        print(f"Processing part {i+1}...")  # Debugging print
        output = client.chat.completions.create(
            model='gpt-4',
            messages=[{"role": "user", "content": f"Please answer the questions with only your letter of choice. Please remove parentheses from your letter of choice. For example, 1. A. The questions:\n{part}"}]
        )
        print(f"GPT Output for {subject} Part {i+1}: \n{output.choices[0].message.content}")
        part_model_answers = store_gpt_answers(output)
        #print(f"Answers for part {i+1}: {part_model_answers}")  # Debugging print
        combined_model_answers.update(part_model_answers)

    print("All parts processed. Combining answers...")  # Debugging print

    end_time = time.time()
    run_time = round(end_time - start_time, 2)

    answer_key = parse_answer_key(answers)
    for question_number, model_answer in combined_model_answers.items():
        correctness = 1 if answer_key.get(question_number, ('', ''))[0] == model_answer else 0
        skill_assessed = answer_key.get(question_number, ('', ''))[1]
        results.append((question_number, correctness, skill_assessed, run_time))

    return results

def store_gpt_answers(output):
    answers_text = output.choices[0].message.content
    model_answers = {}
    # Pattern to match both "Number. Letter" and "Number. (Letter) Additional Content"
    pattern = re.compile(r'(\d+)\s*[.:]?\s*(?:\(([A-E])\)|([A-E]))', re.IGNORECASE)
    matches = pattern.findall(answers_text)
    for match in matches:
        q_num, answer_with_paren, answer_without_paren = match
        answer = answer_with_paren or answer_without_paren
        model_answers[q_num.strip()] = answer.upper()
    return model_answers

def parse_answer_key(answers):
    answer_key = {}
    for line in answers.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            question_number, correct_answer, skill = parts[0], parts[1], ' '.join(parts[2:])
            answer_key[question_number] = (correct_answer, skill)
    return answer_key

# Example usage for a test with multiple parts:
subject = "Physics"
filename = f"GPT_{subject}_test_results.csv"
test_parts = [history_test_part1, physics_test_part2, physics_test_part3, physics_test_part4]  # Extend this list for more parts
answers = history_answers  # Assuming you have a combined answer key for all parts

initialize_csv_file(filename, subject)

full_test_results = generate_combined_test_results(test_parts, answers)

update_csv(filename, full_test_results, subject)
print(f"Test completed in {full_test_results[0][3]} seconds.\nResults added to {filename}.")

## Gemini Pro

In [4]:
import google.generativeai as genai
genai.configure(api_key="")
model = genai.GenerativeModel('gemini-pro')

In [None]:
def initialize_csv_file(filename, subject):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([f"{subject}_Question Number", f"{subject}_Correctness", f"{subject}_Skill Assessed", f"{subject}_Time Taken"])

def update_csv(filename, results, subject):
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        # Writing each question's result as a new row
        for result in results:
            writer.writerow([f"{subject}_{result[0]}", result[1], result[2], result[3]])

def generate_combined_test_results(test_parts, answers):
    combined_model_answers = {}
    results = []
    start_time = time.time()

    for i, part in enumerate(test_parts):
        print(f"Processing part {i+1}...")  # Debugging print
        response = model.generate_content(f"Please answer the questions with only your letter of choice. Please remove parentheses from your letter of choice. For example, 1. A. The questions:\n{part}")
        print(f"Gemini Output for {subject} Part {i+1}: \n{response.text}")
        part_model_answers = store_gemini_answers(response)
        #print(f"Answers for part {i+1}: {part_model_answers}")  # Debugging print
        combined_model_answers.update(part_model_answers)

    print("All parts processed. Combining answers...")  # Debugging print

    end_time = time.time()
    run_time = round(end_time - start_time, 2)

    answer_key = parse_answer_key(answers)
    for question_number, model_answer in combined_model_answers.items():
        correctness = 1 if answer_key.get(question_number, ('', ''))[0] == model_answer else 0
        skill_assessed = answer_key.get(question_number, ('', ''))[1]
        results.append((question_number, correctness, skill_assessed, run_time))

    return results

def store_gemini_answers(response):
    answers_text = response.text
    model_answers = {}
    # Pattern to match both "Number. Letter" and "Number. (Letter) Additional Content"
    pattern = re.compile(r'(\d+)\s*[.:]?\s*(?:\(([A-E])\)|([A-E]))', re.IGNORECASE)
    matches = pattern.findall(answers_text)
    for match in matches:
        q_num, answer_with_paren, answer_without_paren = match
        answer = answer_with_paren or answer_without_paren
        model_answers[q_num.strip()] = answer.upper()
    return model_answers

def parse_answer_key(answers):
    answer_key = {}
    for line in answers.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            question_number, correct_answer, skill = parts[0], parts[1], ' '.join(parts[2:])
            answer_key[question_number] = (correct_answer, skill)
    return answer_key

# Example usage for a test with multiple parts:
subject = "Physics"
filename = f"Gemini_{subject}_test_results.csv"
test_parts = [physics_test_part1,physics_test_part2, physics_test_part3, physics_test_part4]  # Extend this list for more parts
answers = physics_answers  # Assuming you have a combined answer key for all parts
initialize_csv_file(filename, subject)

full_test_results = generate_combined_test_results(test_parts, answers)

update_csv(filename, full_test_results, subject)
print(f"Test completed in {full_test_results[0][3]} seconds. \nResults added to {filename}.")

## Claude 3 Opus

In [None]:
import anthropic
client = anthropic.Anthropic(api_key="")

In [None]:
def initialize_csv_file(filename, subject):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([f"{subject}_Question Number", f"{subject}_Correctness", f"{subject}_Skill Assessed", f"{subject}_Time Taken"])

def update_csv(filename, results, subject):
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        # Writing each question's result as a new row
        for result in results:
            writer.writerow([f"{subject}_{result[0]}", result[1], result[2], result[3]])

def generate_combined_test_results(test_parts, answers):
    combined_model_answers = {}
    results = []
    start_time = time.time()

    for i, part in enumerate(test_parts):
        print(f"Processing part {i+1}...")  # Debugging print
        message = client.messages.create(
            max_tokens=1000,
            messages=[{"role": "user","content": f"Please answer the questions with only your letter of choice. Please remove parentheses from your letter of choice. For example, 1. A. The questions:\n{part}"}],
            model="claude-3-opus-20240229")
        print(f"Claude Output for {subject} Part {i+1}: \n{message.content[0].text}")
        part_model_answers = store_claude_answers(message)
        #print(f"Answers for part {i+1}: {part_model_answers}")  # Debugging print
        combined_model_answers.update(part_model_answers)

    print("All parts processed. Combining answers...")  # Debugging print

    end_time = time.time()
    run_time = round(end_time - start_time, 2)

    answer_key = parse_answer_key(answers)
    for question_number, model_answer in combined_model_answers.items():
        correctness = 1 if answer_key.get(question_number, ('', ''))[0] == model_answer else 0
        skill_assessed = answer_key.get(question_number, ('', ''))[1]
        results.append((question_number, correctness, skill_assessed, run_time))

    return results

def store_claude_answers(message):
    answers_text = message.content[0].text
    model_answers = {}
    # Pattern to match both "Number. Letter" and "Number. (Letter) Additional Content"
    pattern = re.compile(r'(\d+)\s*[.:]?\s*(?:\(([A-E])\)|([A-E]))', re.IGNORECASE)
    matches = pattern.findall(answers_text)
    for match in matches:
        q_num, answer_with_paren, answer_without_paren = match
        answer = answer_with_paren or answer_without_paren
        model_answers[q_num.strip()] = answer.upper()
    return model_answers

def parse_answer_key(answers):
    answer_key = {}
    for line in answers.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            question_number, correct_answer, skill = parts[0], parts[1], ' '.join(parts[2:])
            answer_key[question_number] = (correct_answer, skill)
    return answer_key

# Example usage for a test with multiple parts:
subject = "history"
filename = f"Claude_{subject}_test_results.csv"
test_parts = [history_test_part1]#, physics_test_part2, physics_test_part3, physics_test_part4]  # Extend this list for more parts
answers = history_answers  # Assuming you have a combined answer key for all parts

initialize_csv_file(filename, subject)

full_test_results = generate_combined_test_results(test_parts, answers)

update_csv(filename, full_test_results, subject)
print(f"Test completed in {full_test_results[0][3]} seconds. \nResults added to {filename}.")