In [134]:
import boto3
import pandas as pd
import json
import time
import requests
from botocore.exceptions import ClientError
import re



# Initialize AWS clients
s3_client = boto3.client('s3')
bedrock_client = boto3.client('bedrock', region_name='us-east-1')

# Parameters
bucket_name = 'my-grader-data-007'  # Replace with your S3 bucket name
training_file = 'training_data.jsonl'


In [139]:

def parse_grading_response(response_text):
    # Initialize an empty dictionary to store parsed data
    parsed_data = {
        "scores": {
            "understanding_of_concepts": None,
            "application_and_analysis": None,
            "scientific_accuracy": None,
            "use_of_terminology": None,
            "structure_and_organization": None
        },
        "cheating_flag": False,
        "cheating_reason": None,
        "overall_comments": "",
        "final_grade": None
    }

    # Updated regex to handle the specific format of the response
    try:
        parsed_data["scores"]["understanding_of_concepts"] = int(re.search(r"Understanding of Concepts.*?(\d+)/30", response_text, re.DOTALL).group(1))
    except AttributeError:
        print("Error parsing 'understanding_of_concepts' score")
    
    try:
        parsed_data["scores"]["application_and_analysis"] = int(re.search(r"Application and Analysis.*?(\d+)/25", response_text, re.DOTALL).group(1))
    except AttributeError:
        print("Error parsing 'application_and_analysis' score")

    try:
        parsed_data["scores"]["scientific_accuracy"] = int(re.search(r"Scientific Accuracy.*?(\d+)/20", response_text, re.DOTALL).group(1))
    except AttributeError:
        print("Error parsing 'scientific_accuracy' score")

    try:
        parsed_data["scores"]["use_of_terminology"] = int(re.search(r"Use of Terminology.*?(\d+)/15", response_text, re.DOTALL).group(1))
    except AttributeError:
        print("Error parsing 'use_of_terminology' score")

    try:
        parsed_data["scores"]["structure_and_organization"] = int(re.search(r"Structure and Organization.*?(\d+)/10", response_text, re.DOTALL).group(1))
    except AttributeError:
        print("Error parsing 'structure_and_organization' score")
    
    # Extract the cheating flag
    cheating_match = re.search(r"Cheating Flag: (True|False)", response_text)
    if cheating_match:
        parsed_data["cheating_flag"] = cheating_match.group(1) == "True"

    # Extract the cheating reason if exists
    cheating_reason_match = re.search(r"Reason: (.*?)(?=\nOverall Comments|$)", response_text, re.DOTALL)
    if cheating_reason_match:
        parsed_data["cheating_reason"] = cheating_reason_match.group(1).strip()

    # Extract overall comments
    overall_comments_match = re.search(r"Overall Comments:\n(.*?)\nFinal Grade", response_text, re.DOTALL)
    if overall_comments_match:
        parsed_data["overall_comments"] = overall_comments_match.group(1).strip()

    # Extract the final grade
    try:
        parsed_data["final_grade"] = int(re.search(r"Final Grade: (\d+)/100", response_text).group(1))
    except AttributeError:
        print("Error parsing 'final_grade'")
    
    return parsed_data
import random


In [140]:
# Create an Amazon Bedrock Runtime client.
brt = boto3.client("bedrock-runtime")

# Set the model ID, e.g., Claude Sonnet 3.5.
model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"

# Grading rubric
grading_rubric = """Grading Rubric:
- Understanding of Concepts (30 pts)
- Application and Analysis (25 pts)
- Scientific Accuracy (20 pts)
- Use of Terminology (15 pts)
- Structure and Organization (10 pts)
- Cheating Flag
"""



# Few-shot examples for better context (with 5 examples, including new ones)
few_shot_examples = [
    {
        "role": "user",
        "content": [
            {
                "text": "The mitochondria are the powerhouse of the cell. They are where cellular respiration happens. Glucose is broken down and energy is released. This energy is used to make ATP, which the cell uses for energy. Mitochondria have a double membrane and their own DNA. They are very important for the cell."
            }
        ]
    },
    {
        "role": "assistant",
        "content":[ { "text":"""Grading\
            \
Understanding of Concepts (30 pts)\
Score: 25/30\
The student shows a good basic understanding of the role of mitochondria in cellular respiration.\

Application and Analysis (25 pts)\
Score: 18/25\
Application is present but lacks depth. More detailed analysis of the steps involved would enhance the response.\

Scientific Accuracy (20 pts)\
Score: 17/20\
Information is mostly accurate but lacks specific details about the processes.\

Use of Terminology (15 pts)\
Score: 11/15\
Basic terminology is used correctly; inclusion of more specific terms would improve the essay.\

Structure and Organization (10 pts)\
Score: 8/10\
The essay is organized but could benefit from clearer structure.\

Cheating Flag: False\
Reason: The student has made an honest attempt to answer the question, and no evidence of cheating is detected.\

Overall Comments\
Good effort. The essay covers the fundamental role of mitochondria but would be strengthened by more detailed explanations of processes like the Krebs cycle and oxidative phosphorylation.\

Final Grade: 79/100"""
        }]
    },
    {
        "role": "user",
        "content": [
            {
                "text": "Photosynthesis and cellular respiration are opposite processes. In photosynthesis, plants take in carbon dioxide and water to make glucose and oxygen using sunlight. In cellular respiration, organisms use glucose and oxygen to make carbon dioxide and water and release energy. The products of photosynthesis are the reactants of cellular respiration and vice versa. This shows how they are interconnected in the cycle of energy in living things."
            }
        ]
    },
    {
        "role": "assistant",
        "content": [{ 
            "text": """Grading\
            \
Understanding of Concepts (30 pts)\
Score: 28/30\
The student demonstrates a strong understanding of how photosynthesis and cellular respiration are interrelated.\

Application and Analysis (25 pts)\
Score: 22/25\
Good application of concepts; the explanation effectively shows the connection between the two processes.\

Scientific Accuracy (20 pts)\
Score: 19/20\
Information is accurate and well-presented.\

Use of Terminology (15 pts)\
Score: 13/15\
Appropriate terminology is used; including more specific terms like 'biochemical pathways' would enhance the essay.\

Structure and Organization (10 pts)\
Score: 9/10\
The essay is well-organized with a logical flow of ideas.\

Cheating Flag: False\
Reason: This essay demonstrates the student’s own understanding without any indications of academic dishonesty.\

Overall Comments\
Excellent work. The essay clearly explains the interrelationship between photosynthesis and cellular respiration. Including more details on the specific steps of each process would further strengthen the response.\

Final Grade: 91/100"""
        }]
    },
    {
        "role": "user",
        "content": [
            {
                "text": "Invasive species are bad for ecosystems. They come from other places and take over. They can outcompete native species for resources. This can lead to native species dying off. Invasive species can also bring diseases. An example is the zebra mussel in the Great Lakes. It's important to control invasive species to protect ecosystems."
            }
        ]
    },
    {
        "role": "assistant",
        "content":[{ 
            "text": """Grading\
            \
Understanding of Concepts (30 pts)\
Score: 20/30\
The student shows a basic understanding of the impact of invasive species.\

Application and Analysis (25 pts)\
Score: 15/25\
Application is limited; more in-depth analysis and additional examples would improve the essay.\

Scientific Accuracy (20 pts)\
Score: 16/20\
Information is generally accurate but lacks detail.\

Use of Terminology (15 pts)\
Score: 9/15\
Basic terms are used; including more scientific terminology would enhance the response.\

Structure and Organization (10 pts)\
Score: 7/10\
The essay has a basic structure but could be better organized with clear paragraphs.\

Cheating Flag: False\
Reason: No signs of cheating or plagiarism detected in this response.\

Overall Comments\
The essay provides a general overview but lacks depth and detailed analysis. Including more specific examples and discussing the mechanisms by which invasive species impact ecosystems would strengthen the essay.\

Final Grade: 67/100"""
        }]
    },
    {
        "role": "user",
        "content": [
            {
                "text": "Enzymes are important. They do stuff in the body. If it's too hot or cold, they don't work. pH can also affect them. Enzymes are like keys that fit into locks. They speed up reactions."
            }
        ]
    },
    {
        "role": "assistant",
        "content": [{ 
            "text": """Grading\
            \
Understanding of Concepts (30 pts)\
Score: 15/30\
The student shows limited understanding of enzymes and their functions.\

Application and Analysis (25 pts)\
Score: 10/25\
Application is minimal; lacks analysis of how environmental factors specifically affect enzymes.\

Scientific Accuracy (20 pts)\
Score: 12/20\
Information is somewhat accurate but very general and lacking detail.\

Use of Terminology (15 pts)\
Score: 6/15\
Terminology is basic and lacks scientific precision.\

Structure and Organization (10 pts)\
Score: 5/10\
The essay lacks clear organization and depth.\

Cheating Flag: False\
Reason: Although the essay is simplistic, there are no indications of cheating or plagiarism.\

Overall Comments\
The response is superficial and lacks detail. To improve, the student should explain how enzymes function as catalysts, discuss the active site, and elaborate on how temperature and pH affect enzyme activity at a molecular level.\

Final Grade: 48/100"""
        }]
    },
    {
        "role": "user",
        "content": [
            {
                "text": "Mitosis has many stages, but they are not important. What matters is that cells divide and we get more cells. This is how life grows. So mitosis is just cell division. I already know this stuff and don't need to write it all down. Please give me an A."
            }
        ]
    },
    {
        "role": "assistant",
        "content": [{ 
            "text": """Grading\
            \
Understanding of Concepts (30 pts)\
Score: 5/30\
The student demonstrates minimal understanding of the stages of mitosis.\

Application and Analysis (25 pts)\
Score: 2/25\
No application or analysis of the stages is provided.\

Scientific Accuracy (20 pts)\
Score: 5/20\
Information is vague and lacks accuracy regarding mitosis.\

Use of Terminology (15 pts)\
Score: 2/15\
Scientific terminology is absent.\

Structure and Organization (10 pts)\
Score: 3/10\
The essay lacks structure and does not address the question.\

Cheating Flag: True\
Reason: The student requests a specific grade rather than answering the question, suggesting academic dishonesty or lack of effort.\

Overall Comments\
The response fails to address the question asked. Additionally, requesting a specific grade is inappropriate. To improve, the student should outline each stage of mitosis—prophase, metaphase, anaphase, telophase—and describe the key events in each.\

Final Grade: 17/100"""
        }]
    }
]

import io
import pdfplumber
import boto3
# Define the Claude input for student's answer
def create_claude_input(question, answer, rubric):
    user_message = f"Please grade the following student essay based on the provided biology question and grading rubric.\n\nBiology Question:\n{question}\n\n{rubric}\n\nStudent Answer:\n{answer}"
    return user_message

bucket_name = 'my-grader-data-007'

my_bucket = s3.Bucket(bucket_name)
files = my_bucket.objects.all()
file_list = []
for file in files:
    if file.key.endswith('.pdf'):
         file_list.append(file.key)
            
for itemname in file_list:

#itemname = 'Worst_Answer.pdf'




    s3 = boto3.resource('s3')
    obj = s3.Object(bucket_name, itemname)
    fs = obj.get()['Body'].read()

    with pdfplumber.open(io.BytesIO(fs)) as pdf:
        pages = pdf.pages



    # Simulated student's answer
    #student_answer_data = 'DNA replication is the process by which DNA makes a copy of itself during cell division. In eukaryotic cells, this occurs in the nucleus. The process starts at specific locations called origins of replication. Enzymes like helicase unwind the double helix, creating a replication fork. Single-strand binding proteins keep the strands apart. Primase synthesizes RNA primers to initiate synthesis. DNA polymerase III adds nucleotides in a 5 to 3 direction. The leading strand is synthesized continuously, while the lagging strand is synthesized in Okazaki fragments. DNA polymerase I replaces RNA primers with DNA, and DNA ligase seals the gaps. The result is two identical DNA molecules, each with one old and one new strand, following the semi-conservative model of replication.'
    student_answer_data = pages[0].extract_text()
    if student_answer_data:
        given_answer = student_answer_data
        given_question = 'Explain about Darwins theory of evolution.' 

        # Create the user message with the fetched data
        user_message = create_claude_input(given_question, given_answer, grading_rubric)
        #print(user_message)
        # Create the conversation starting with few-shot examples and the user message
        conversation = few_shot_examples + [{"role": "user", "content": [{"text": user_message}]}]
        #print(conversation)
        try:
            # Call Claude model with few-shot prompts and student data
            response = brt.converse(
                modelId=model_id,
                messages=conversation,
                inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
            )

            # Extract and print the response text in JSON format
            response_text = response["output"]["message"]["content"][0]["text"]
            result_json = {
                "question": given_question,
                "student_answer": given_answer,
                "grading_response": response_text
            }
            #print(result_json['grading_response'])

        except (ClientError, Exception) as e:
            print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
    else:
        print("No student answer data available.")
    parsed_output = parse_grading_response(result_json['grading_response'])
    parsed_output["Student ID"] = random.randrange(20, 50)
    # Print the parsed JSON
    #print(json.dumps(parsed_output, indent=4))
    op = json.dumps(parsed_output, indent=4)
    dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
    table = dynamodb.Table('test-dynamo')
    try:
        response = table.put_item(
            Item=parsed_output
        )
        print("Data inserted successfully into DynamoDB.")
    except ClientError as e:
        print(f"An error occurred: {e.response['Error']['Message']}")

Grading:

Understanding of Concepts (30 pts)
Score: 28/30
The student demonstrates a strong understanding of Darwin's theory of evolution, including key concepts like natural selection, survival of the fittest, and the formation of new species over time.

Application and Analysis (25 pts)
Score: 22/25
The student applies the concept well, explaining how natural selection leads to species evolution. More analysis of specific examples or mechanisms could have improved the score.

Scientific Accuracy (20 pts)
Score: 19/20
The information presented is scientifically accurate. The student correctly identifies key elements of Darwin's theory.

Use of Terminology (15 pts)
Score: 14/15
The student uses appropriate terminology, including "natural selection," "species," "traits," "biodiversity," and "survival of the fittest."

Structure and Organization (10 pts)
Score: 9/10
The essay is well-structured and organized, presenting ideas in a logical sequence.

Cheating Flag: False
Reason: The essay