In [1]:
# Predict Final Letter Grade from Course Assignments

In [31]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [32]:
df = pd.read_csv("refined_course_dataset.csv")

In [33]:
features = [
    "Discussion_Count", "Discussion_Weight_%", "Discussion_Avg_Score",
    "Assignment_Count", "Assignment_Weight_%", "Assignment_Avg_Score",
    "Quiz_Count", "Quiz_Weight_%", "Quiz_Avg_Score",
    "Final_Exam_Weight_%", "Final_Exam_Score",
    "Other_Weight_%", "Other_Avg_Score"
]

X = df[features].fillna(0)
y = df["Final_Numeric_Grade"]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.5, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [35]:
y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

MSE: 1.6377403846153848
R² Score: 0.9980017922481208


In [36]:
def numeric_to_letter(grade):
    if grade >= 98: return "A+"
    elif grade >= 93: return "A"
    elif grade >= 90: return "A-"
    elif grade >= 88: return "B+"
    elif grade >= 83: return "B"
    elif grade >= 80: return "B-"
    elif grade >= 78: return "C+"
    elif grade >= 73: return "C"
    elif grade >= 70: return "C-"
    elif grade >= 68: return "D+"
    elif grade >= 63: return "D"
    elif grade >= 60: return "D-"
    else: return "F"

In [40]:
partial_course = pd.DataFrame([{
    "Discussion_Count": 4, "Discussion_Weight_%": 30.0, "Discussion_Avg_Score": 90.125,
    "Assignment_Count": 8, "Assignment_Weight_%": 30.0, "Assignment_Avg_Score": 97.8125,
    "Quiz_Count": 2, "Quiz_Weight_%": 20.0, "Quiz_Avg_Score": 93.61,
    "Final_Exam_Weight_%": 15.0, "Final_Exam_Score": 93.33,  # Taken
    "Other_Weight_%": 5.0, "Other_Avg_Score": 77.78
}])

predicted_numeric = model.predict(partial_course)[0]
print(f"Predicted Final Numeric Grade: {predicted_numeric:.2f}")
predicted_letter = numeric_to_letter(predicted_numeric)
print(f"Predicted Letter Grade: {predicted_letter}")

Predicted Final Numeric Grade: 85.28
Predicted Letter Grade: B


In [41]:
partial_course = pd.DataFrame([{
    "Discussion_Count": 8, "Discussion_Weight_%": 25.0, "Discussion_Avg_Score": 93.1428571,
    "Assignment_Count": 8, "Assignment_Weight_%": 40.0, "Assignment_Avg_Score": 99.6,
    "Quiz_Count": 2, "Quiz_Weight_%": 20.0, "Quiz_Avg_Score": 97.5,
    "Final_Exam_Weight_%": 15.0, "Final_Exam_Score": 100.0,  # Not taken yet
    "Other_Weight_%": 0.0, "Other_Avg_Score": 0.0
}])

predicted_numeric = model.predict(partial_course)[0]
print(f"Predicted Final Numeric Grade: {predicted_numeric:.2f}")
predicted_letter = numeric_to_letter(predicted_numeric)
print(f"Predicted Letter Grade: {predicted_letter}")

Predicted Final Numeric Grade: 91.82
Predicted Letter Grade: A-


In [42]:
partial_course = pd.DataFrame([{
    "Discussion_Count": 5, "Discussion_Weight_%": 30.0, "Discussion_Avg_Score": 100.0,
    "Assignment_Count": 6, "Assignment_Weight_%": 35.0, "Assignment_Avg_Score": 100.0,
    "Quiz_Count": 2, "Quiz_Weight_%": 20.0, "Quiz_Avg_Score": 96.665,
    "Final_Exam_Weight_%": 15.0, "Final_Exam_Score": 95.0,  # Not taken yet
    "Other_Weight_%": 0.0, "Other_Avg_Score": 0.0
}])

predicted_numeric = model.predict(partial_course)[0]
print(f"Predicted Final Numeric Grade: {predicted_numeric:.2f}")
predicted_letter = numeric_to_letter(predicted_numeric)
print(f"Predicted Letter Grade: {predicted_letter}")

Predicted Final Numeric Grade: 97.43
Predicted Letter Grade: A
