In [14]:
import pandas as pd
import plotly.graph_objects as go

In [15]:
import sys
import os 
sys.path.append("../student_marks")

In [16]:
repo_root = os.path.abspath("..")
gt_path = os.path.join(repo_root, "data/student_grades_ground_truth.xlsx")
preds_path = os.path.join(repo_root, "data/student_grades.xlsx")
student_grades_gt = pd.read_excel(gt_path)
student_grade_predictions = pd.read_excel(preds_path)

In [17]:
student_grades_gt["student_id"] = student_grades_gt["student_id"].ffill()
student_grades_gt["report"] = student_grades_gt["report"].ffill()
student_grades_gt["global_grade"] = student_grades_gt["global_grade"].ffill()

In [18]:
student_grades_gt_preds = student_grades_gt.merge(student_grade_predictions, on=["student_id", "skill"], suffixes=('_gt', '_pred'))

In [19]:
student_grades_gt_preds["Error skill"] = student_grades_gt_preds["grade_gt"] - student_grades_gt_preds["grade_pred"]
student_grades_gt_preds["Error global grade"] = student_grades_gt_preds["global_grade_gt"] - student_grades_gt_preds["global_grade_pred"]

## Error per student

In [20]:
error_per_student = student_grades_gt_preds.groupby("student_id").first()[["global_grade_gt", "global_grade_pred", "Error global grade"]]
error_per_student

Unnamed: 0_level_0,global_grade_gt,global_grade_pred,Error global grade
student_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
student_1,10.0,8.0,2.0
student_2,6.2,6.8,-0.6
student_3,4.0,5.4,-1.4
student_4,5.2,6.4,-1.2


In [21]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=error_per_student.index,
    y=error_per_student["global_grade_gt"],
    mode="markers",
    name="Ground truth"
))
fig.add_trace(go.Scatter(
    x=error_per_student.index,
    y=error_per_student["global_grade_pred"],
    mode="markers",
    name="IA"
))

fig.update_layout(
    xaxis_title="Student",
    yaxis_title="Global grade",
    xaxis=dict(tickangle=45)
)

fig.show()


In [22]:
fig = go.Figure()


fig.add_trace(go.Scatter(
    x=error_per_student.index,
    y=error_per_student["Error global grade"],
    mode="markers"
))

fig.add_hline(y=1.5, line_dash="dash", line_color="red")
fig.add_hline(y=-1.5, line_dash="dash", line_color="red")

fig.update_layout(
    xaxis_title="Student",
    yaxis_title="Error global grade (GT-IA)",
    xaxis=dict(tickangle=45)
)

fig.show()


## Error per skill

In [23]:
error_per_skill = student_grades_gt_preds.groupby("skill")["Error skill"].agg(
    mean_error=lambda x: x.abs().mean(),
    std_error=lambda x: x.abs().std().round(2)
).reset_index()
error_per_skill = error_per_skill.rename(columns={
    "mean_error": "Error skill",
    "std_error": "Standard deviation skill"
})

In [24]:
for skill in error_per_skill["skill"].unique():
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students error other than 0"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(student_grades_gt_preds["Error skill"]!=0)])
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students absoluto error greater than 1"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(abs(student_grades_gt_preds["Error skill"])>1)])
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students absolute error greater than 2"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(abs(student_grades_gt_preds["Error skill"])>2)])
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students absolute error equal to 10"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(abs(student_grades_gt_preds["Error skill"])==10)])
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students IA less restrictive"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(student_grades_gt_preds["grade_gt"]>student_grades_gt_preds["grade_pred"])])
    error_per_skill.loc[error_per_skill["skill"]==skill, "Number students IA more or equal restrictive"] = len(student_grades_gt_preds.loc[(student_grades_gt_preds["skill"]==skill)&(student_grades_gt_preds["grade_gt"]<=student_grades_gt_preds["grade_pred"])])

In [25]:
error_per_skill

Unnamed: 0,skill,Error skill,Standard deviation skill,Number students error other than 0,Number students absoluto error greater than 1,Number students absolute error greater than 2,Number students absolute error equal to 10,Number students IA less restrictive,Number students IA more or equal restrictive
0,academic_skills,0.5,1.0,1.0,1.0,0.0,0.0,1.0,3.0
1,communication_skills,2.0,0.0,4.0,4.0,0.0,0.0,1.0,3.0
2,guidance_skills,1.5,1.91,2.0,2.0,1.0,0.0,1.0,3.0
3,learning_and_thinking_skills,2.0,1.63,3.0,3.0,1.0,0.0,1.0,3.0
4,socioemotional_skills,1.5,1.0,3.0,3.0,0.0,0.0,2.0,2.0


In [26]:
fig = go.Figure()


fig.add_trace(go.Scatter(
    x=error_per_skill.index,
    y=error_per_skill["Error skill"],
    mode="markers",
))

fig.add_hline(y=1.75, line_dash="dash", line_color="red")


fig.update_layout(
    xaxis_title="Skill",
    yaxis_title="Error skill (GT-IA)",
    xaxis=dict(tickangle=45)
)

fig.show()
