In [1]:
import re
import spacy
from textblob import TextBlob

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

def preprocess(text):
    """
    Cleans and normalizes text for grading.
    """
    # 1. lowercase
    text = text.lower()

    # 2. remove punctuation and numbers
    text = re.sub(r'[^a-z\s]', '', text)

    # 3. fix spacing
    text = re.sub(r'\s+', ' ', text).strip()

    # 4. light spell correction
    try:
        text = str(TextBlob(text).correct())
    except Exception:
        pass  # skip if TextBlob fails (rare)

    # 5. lemmatization (word simplification)
    doc = nlp(text)
    lemmas = [token.lemma_ for token in doc if not token.is_stop]
    text = ' '.join(lemmas)

    return text


In [2]:
raw_text = "Photosyntheis is the proses by which plantz make there food!!"
print("Before:", raw_text)
print("After:", preprocess(raw_text))

Before: Photosyntheis is the proses by which plantz make there food!!
After: photosyntheis prof plant food


In [3]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Sentence-BERT model (only once)
model = SentenceTransformer('all-MiniLM-L6-v2')

def grade_answer(student_answer, key_answer, max_marks=10):
    """
    Grades a student's answer by comparing it to the key answer.
    Uses preprocessing + semantic similarity.
    """
    # Step 1: Preprocess both answers
    student_clean = preprocess(student_answer)
    key_clean = preprocess(key_answer)

    # Step 2: Encode both answers into vector embeddings
    embeddings = model.encode([student_clean, key_clean])

    # Step 3: Compute similarity between them
    sim = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

    # Step 4: Convert similarity into marks
    if sim >= 0.9:
        marks = max_marks
    elif sim >= 0.75:
        marks = round(max_marks * 0.8)
    elif sim >= 0.6:
        marks = round(max_marks * 0.5)
    else:
        marks = 0

    return marks, sim

In [4]:
key_answer = "Photosynthesis is the process by which plants make food using sunlight."
student_answer = "Photosyntheis is the proses by which plantz make there food."

marks, sim = grade_answer(student_answer, key_answer)
print(f"Similarity Score: {sim:.2f}")
print(f"Marks Awarded: {marks}/10")

Similarity Score: 0.76
Marks Awarded: 8/10
