In [1]:
import pickle
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

In [2]:
import re
import spacy
from textblob import TextBlob

nlp = spacy.load("en_core_web_sm")

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()

    try:
        text = str(TextBlob(text).correct())
    except:
        pass

    doc = nlp(text)
    lemmas = [token.lemma_ for token in doc if not token.is_stop]
    return " ".join(lemmas)

In [4]:
import os
import pickle

BASE_DIR = r"D:\Career\FYP\Development"

MODEL_DIR = os.path.join(BASE_DIR, "Models")

with open(os.path.join(MODEL_DIR, "key_embeddings.pkl"), "rb") as f:
    key_embeddings = pickle.load(f)

with open(os.path.join(MODEL_DIR, "sciq_processed.pkl"), "rb") as f:
    df_keys = pickle.load(f)

print("Loaded keys:", len(df_keys))

Loaded keys: 13613


In [5]:
model = SentenceTransformer("all-MiniLM-L6-v2")

In [11]:
import os
import pandas as pd

BASE_DIR = r"D:\Career\FYP\Development"
STUDENT_INPUT_DIR = os.path.join(BASE_DIR, "Datasets", "Students_Inputs")

student_df = pd.read_csv(
    os.path.join(STUDENT_INPUT_DIR, "answers.xls")
)

print("Student answers loaded:", student_df.shape)
student_df.head()

Student answers loaded: (3, 2)


Unnamed: 0,question_id,student_answer
0,4,Plants use sunlight to make food.
1,12,Evaporation happens when water changes into va...
2,30,The sun is the main source of energy on earth.


In [12]:
def grade_answer(student_answer):
    cleaned = preprocess(student_answer)
    student_emb = model.encode(cleaned, convert_to_tensor=True)

    similarities = cos_sim(student_emb, key_embeddings)[0]
    best_idx = torch.argmax(similarities).item()
    score = float(similarities[best_idx])

    if score >= 0.80:
        marks = 10
    elif score >= 0.65:
        marks = 8
    elif score >= 0.50:
        marks = 6
    else:
        marks = 3

    return marks, score, df_keys.iloc[best_idx]["correct_answer"]

In [13]:
results = []

for ans in student_df["student_answer"]:
    marks, score, key = grade_answer(ans)
    results.append({
        "student_answer": ans,
        "matched_key": key,
        "similarity": score,
        "marks": marks
    })

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,student_answer,matched_key,similarity,marks
0,Plants use sunlight to make food.,eating plants or organisms,0.636108,6
1,Evaporation happens when water changes into va...,evaporation,0.783794,8
2,The sun is the main source of energy on earth.,solar energy,0.668202,8


In [15]:
results_df.to_csv("D:\Career\FYP\Development\Datasets\Students_Inputs/graded_results.csv", index=False)
print("Evaluation results saved.")

Evaluation results saved.
