In [56]:
# Grade prediction
#  Internship number: 2306

In [58]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
dataset_grades = pd.read_csv("/Users/wilogy/Downloads/Projects/Project 3/Grades.csv")

# Create a copy of the dataset to avoid altering the original CGPA values
data = dataset_grades.copy()

# Define a grading scale
grading_scale = {
    'A+': 4.0, 'A': 4.0, 'A-': 3.7,
    'B+': 3.3, 'B': 3.0, 'B-': 2.7,
    'C+': 2.3, 'C': 2.0, 'C-': 1.7,
    'D+': 1.3, 'D': 1.0, 'D-': 0.7,
    'F': 0.0, 'WU': 0.0, 'W':0.0
}

# Map letter grades to numeric values
for column in data.columns:
    data[column] = data[column].map(grading_scale)

# Handle any remaining missing values by filling with the column mean
data = data.fillna(data.mean())

# Define the courses to use as features
course_codes = ["PH-121", "HS-101", "CY-105", "HS-105/12", "MT-111", "CS-105", "CS-106", 
                "EL-102", "EE-119", "ME-107", "CS-107", "HS-205/20", "MT-222", "EE-222", "MT-224", 
                "CS-210", "CS-211", "CS-203", "CS-214", "EE-217", "CS-212", "CS-215", "MT-331", 
                "EF-303", "HS-304", "CS-301", "CS-302", "TC-383", "MT-442", "EL-332", "CS-318", 
                "CS-306", "CS-312", "CS-317", "CS-403", "CS-421", "CS-406", "CS-414", "CS-419", 
                "CS-423", "CS-412"]

# Define the features (X) and target (y)
X = data[course_codes]
y = dataset_grades["CGPA"]

# Handle missing values in the target variable y
y = y.fillna(y.mean())  

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Using Linear Regression to train the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
cgpa_prediction = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, cgpa_prediction)
mse = mean_squared_error(y_test, cgpa_prediction)
rmse = mean_squared_error(y_test, cgpa_prediction, squared=False)
r2 = r2_score(y_test, cgpa_prediction)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)


Mean Absolute Error: 0.04168590892461823
Mean Squared Error: 0.003270201126172455
Root Mean Squared Error: 0.05718567238541884
R-squared: 0.9901733067632673
