In [1]:
import pandas as pd
import numpy as np


In [2]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR


In [4]:
data = pd.read_csv("student-mat.csv", sep=';')
data.head()


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


In [5]:
le = LabelEncoder()

for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = le.fit_transform(data[column])


In [6]:
X = data.drop("G3", axis=1)
y = data["G3"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [9]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)


In [10]:
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)


In [11]:
print("Linear Regression MSE:", mse_lr)
print("Linear Regression R2 Score:", r2_lr)


Linear Regression MSE: 5.03239410886674
Linear Regression R2 Score: 0.7545777855043501


In [12]:
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)


In [13]:
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)




In [14]:
print("Decision Tree MSE:", mse_dt)
print("Decision Tree R2 Score:", r2_dt)

Decision Tree MSE: 5.658227848101266
Decision Tree R2 Score: 0.7240568249304535


In [15]:
param_grid = {
    "C": [0.1, 1, 10],
    "gamma": ["scale", 0.01, 0.1],
    "kernel": ["rbf"]
}


In [16]:

svr = SVR()

grid = GridSearchCV(
    svr,
    param_grid,
    cv=5,
    scoring="neg_mean_squared_error"
)


In [17]:
grid.fit(X_train_scaled, y_train)

best_svr = grid.best_estimator_


In [18]:
y_pred_svr = best_svr.predict(X_test_scaled)

mse_svr = mean_squared_error(y_test, y_pred_svr)
r2_svr = r2_score(y_test, y_pred_svr)

print("Optimised SVR MSE:", mse_svr)
print("Optimised SVR R2 Score:", r2_svr)


Optimised SVR MSE: 5.0617151795667255
Optimised SVR R2 Score: 0.7531478414365961


In [19]:
results = pd.DataFrame({
    "Model": ["Linear Regression", "Decision Tree", "Optimised SVR"],
    "MSE": [mse_lr, mse_dt, mse_svr],
    "R2 Score": [r2_lr, r2_dt, r2_svr]
})


In [20]:
results

Unnamed: 0,Model,MSE,R2 Score
0,Linear Regression,5.032394,0.754578
1,Decision Tree,5.658228,0.724057
2,Optimised SVR,5.061715,0.753148


In [21]:
train_pred = best_svr.predict(X_train_scaled)

train_mse = mean_squared_error(y_train, train_pred)
test_mse = mse_svr

print("Training MSE:", train_mse)
print("Testing MSE:", test_mse)


Training MSE: 2.0170281385897764
Testing MSE: 5.0617151795667255
