In [None]:
import sys
import os 
sys.path.append("../student_marks")

In [None]:
import pandas as pd

from indexer import StudentIndexBuilder
from retriever import StudentRetriever
from obtain_grade import load_skills, generate_prompt, generate_explanation_prompt, call_llm, compute_global_mark

In [None]:
repo_root = os.path.abspath("..")
yaml_dir = os.path.join(repo_root, "data/student_reports")
index_file = os.path.join(repo_root, "student_marks/students_info.index")
meta_file = os.path.join(repo_root, "student_marks/students_metadata.npy")

index = StudentIndexBuilder(
    yaml_dir=yaml_dir,
    index_file=index_file,
    meta_file=meta_file
)
index.build_index()


In [None]:
STUDENTS_IDS = ["student_1", "student_2", "student_3", "student_4"]
SKILLS_YAML = os.path.join(repo_root, "student_marks/skills.yaml")
TOP_K=3

In [None]:
skills = load_skills(SKILLS_YAML)
retriever = StudentRetriever(index_file=index_file, meta_file=meta_file)

student_grades = {}

for student_id in STUDENTS_IDS:
    student_grades[student_id] = {}

    for skill_name, skill_data in skills.items():
        description = skill_data.get("description", "")
        instructions = skill_data.get("prompt_instructions", "")
        weight = skill_data.get("weight", "Error")

        chunks = retriever.retrieve(description, student_id=student_id, top_k=TOP_K)
        if not chunks:
            print(f"No chunks found for student {student_id} and skill '{description}'")
            continue

        grade_prompt = generate_prompt(instructions, chunks)
        grade_output = call_llm(grade_prompt)
        print(grade_output)

        explanation_prompt = generate_explanation_prompt(instructions, chunks, grade_output, skill_name)
        explanation_output = call_llm(explanation_prompt)
        print(explanation_output)

        student_grades[student_id][skill_name] = {
            "grade": grade_output,
            "explanation": explanation_output.strip(),
            "relevant_chunks": chunks,
            "weight": weight
        }

    global_mark = compute_global_mark(student_grades[student_id], skills)
    student_grades[student_id]["global grade"] = global_mark

rows = []
for student_id, skills in student_grades.items():
    global_grade = skills.get("global grade")
    for skill, result in skills.items():
        if skill == "global grade":
            continue
        rows.append({
            "student_id": student_id,
            "skill": skill,
            "grade": result.get("grade"),
            "explanation": result.get("explanation"),
            "relevant_chunks": [chunk['text'] for chunk in result.get("relevant_chunks")],
            "global_grade": global_grade
            })
student_grades_df = pd.DataFrame(rows)

In [None]:
import json
with open(os.path.join(repo_root, "data/student_grades.json"), "w") as f:
    json.dump(student_grades, f, indent=4)
student_grades_df.to_excel(os.path.join(repo_root, "data/student_grades.xlsx"), index=False)