In [94]:
import pandas as pd

In [95]:
df = pd.read_csv("Student_Performance.csv")

In [96]:
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [97]:
X = df.iloc[:,:5].values
y = df.iloc[:,-1].values
y

array([91., 65., 45., ..., 74., 95., 64.])

In [98]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([("Extracurricular Activities",OneHotEncoder(),[2])],remainder="passthrough")
X = ct.fit_transform(X)

In [99]:
X

array([[0.0, 1.0, 7, 99, 9, 1],
       [1.0, 0.0, 4, 82, 4, 2],
       [0.0, 1.0, 8, 51, 7, 2],
       ...,
       [0.0, 1.0, 6, 83, 8, 5],
       [0.0, 1.0, 9, 97, 7, 0],
       [1.0, 0.0, 7, 74, 8, 1]], dtype=object)

In [100]:
X = X[:,1:]

In [101]:
X

array([[1.0, 7, 99, 9, 1],
       [0.0, 4, 82, 4, 2],
       [1.0, 8, 51, 7, 2],
       ...,
       [1.0, 6, 83, 8, 5],
       [1.0, 9, 97, 7, 0],
       [0.0, 7, 74, 8, 1]], dtype=object)

In [102]:
import statsmodels.api as sm
import numpy as np

In [103]:
X = sm.add_constant(X)
X = np.append(arr=np.ones((10000, 1)).astype(int), values=X, axis=1)
X_opt = X[:,[0,1,2,3,4,5]]
regressor_ols = sm.OLS(y, X_opt.tolist()).fit()

In [104]:
regressor_ols.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.988
Model:,OLS,Adj. R-squared:,0.988
Method:,Least Squares,F-statistic:,204300.0
Date:,"Wed, 20 Mar 2024",Prob (F-statistic):,0.0
Time:,22:18:05,Log-Likelihood:,-21665.0
No. Observations:,10000,AIC:,43340.0
Df Residuals:,9995,BIC:,43380.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-16.6200,0.064,-259.912,0.000,-16.745,-16.495
x1,-16.6200,0.064,-259.912,0.000,-16.745,-16.495
x2,0.6274,0.042,14.845,0.000,0.545,0.710
x3,2.8567,0.008,350.124,0.000,2.841,2.873
x4,1.0187,0.001,836.215,0.000,1.016,1.021
x5,0.4819,0.012,38.678,0.000,0.458,0.506

0,1,2,3
Omnibus:,2.22,Durbin-Watson:,1.999
Prob(Omnibus):,0.33,Jarque-Bera (JB):,2.264
Skew:,0.002,Prob(JB):,0.322
Kurtosis:,3.074,Cond. No.,9.82e+16


In [105]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)

In [106]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
print(X_train)
X_test = sc_X.fit_transform(X_test)
print(X_test)

[[ 0.          0.          1.01962103 ...  1.69543433 -0.31944271
  -1.26226064]
 [ 0.          0.          1.01962103 ...  1.17637842  1.44869036
  -0.56482402]
 [ 0.          0.         -0.98075655 ... -0.72682657 -0.31944271
  -1.26226064]
 ...
 [ 0.          0.         -0.98075655 ... -1.24588248  0.26993498
   0.4813309 ]
 [ 0.          0.         -0.98075655 ... -1.30355536  1.44869036
  -1.61097894]
 [ 0.          0.         -0.98075655 ... -1.36122824 -0.31944271
   0.4813309 ]]
[[ 0.          0.         -1.01072417 ... -0.00459332  0.88347388
  -0.87220784]
 [ 0.          0.          0.98938962 ... -1.33069816 -1.47875574
   1.22248744]
 [ 0.          0.          0.98938962 ... -0.75413084  0.29291647
   0.1751398 ]
 ...
 [ 0.          0.          0.98938962 ... -1.2153847  -0.29764093
  -0.17397608]
 [ 0.          0.          0.98938962 ...  0.97557113  1.47403128
   0.1751398 ]
 [ 0.          0.          0.98938962 ... -1.50366836 -0.29764093
  -0.17397608]]


In [107]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)

In [108]:
y_pred

array([55.23214673, 23.02760097, 48.47817539, ..., 33.91378637,
       68.35399964, 31.72323747])

In [109]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)
n = len(X_test) 
p = X_test.shape[1] 
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

In [110]:
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("R-Squared:", r_squared)
print("Adjusted R-Squared:", adjusted_r_squared)

Mean Absolute Error: 1.6849362572964255
Mean Squared Error: 4.410298166917474
R-Squared: 0.9880821988936449
Adjusted R-Squared: 0.9880543163375806
