In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load and preprocess the dataset
df = pd.read_csv("graded_exams.csv")
df = df.dropna()

# Shuffle the dataset
shuffled_data = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
# Encode categorical variables
label_encoders = {}
categorical_columns = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    shuffled_data[column] = label_encoders[column].fit_transform(shuffled_data[column])

# Define rule-based grading function with categorical conditions
def assign_grade(score, lunch, test_prep, parent_edu):
    # Small grade boost for students who had a standard lunch
    grade_boost = 1 if lunch == 1 else 0  # Assuming "1" means standard lunch

    # Adjust failure threshold if the student completed test prep
    failure_threshold = 71 if test_prep == 1 else 73  # Lower passing threshold if test prep was taken

    # Parental education adjustment (Example: If parent has 'some high school', score needs +1 to pass)
    parent_edu_adjustment = 1 if parent_edu == 0 else 0  # Assuming '0' is "some high school"

    adjusted_score = score + grade_boost + parent_edu_adjustment

    if adjusted_score >= 97:
        return "Excellent"
    elif adjusted_score >= 93:
        return "Superior"
    elif adjusted_score >= 89:
        return "Good"
    elif adjusted_score >= 85:
        return "Above Average"
    elif adjusted_score >= 81:
        return "Average"
    elif adjusted_score >= 77:
        return "Below Average"
    elif adjusted_score >= failure_threshold:
        return "Passing"
    else:
        return "Failure"