In [27]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


#### Data Loading

In [28]:
df = pd.read_csv('../zuu crew scores.csv')
df = df[df['CourseName']=='Foundations of ML']
df.head()

Unnamed: 0,MemberName,EducationLevel,Attendance,TotalHours,AssignmentsCompleted,HackathonParticipation,GitHubScore,PeerReviewScore,CourseName,CapstoneScore
0,Theekshana Rathnayake,3,79.9,43.7,2,0,62.8,5.0,Foundations of ML,45.3
1,Mayura Sandakalum Sellapperuma,2,76.8,95.6,6,0,87.4,2.7,Foundations of ML,78.8
2,Amila Narangoda,3,96.6,75.9,8,0,98.4,2.8,Foundations of ML,65.4
4,Tharusha Vihanga,2,83.2,24.0,6,0,41.8,4.2,Foundations of ML,40.1
7,Chamath Perera,3,86.5,88.0,5,0,23.9,1.3,Foundations of ML,68.2


In [29]:
X = df['Attendance'].values.reshape(-1,1)
Y = df['CapstoneScore'].values

#### Transform features into polynomial features

In [30]:
poly = PolynomialFeatures(degree=3)
X_poly = poly.fit_transform(X)

#### Model Train

In [31]:
model = LinearRegression()
model.fit(X_poly, Y)


0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [32]:
Y_hat = model.predict(X_poly)
Y_hat

array([60.02215841, 58.64848505, 49.39854102, 60.68644008, 60.2641032 ,
       58.54035935, 60.69835943, 58.32877274, 53.06053113, 50.72131282,
       52.91937956, 52.49917426, 48.85570562, 60.63859945, 59.29806424,
       51.54533784, 53.08295029, 54.77167155, 51.54533784, 60.29546218,
       53.41521155, 53.62895559, 50.28136412, 56.41460845, 60.38214449,
       53.54129402, 58.14644927, 52.91937956, 50.72131282, 48.4358497 ,
       60.69096485, 51.08159604, 55.59115265, 54.48645728, 57.73043141,
       60.69847741, 54.70045319, 55.478809  , 46.67985886, 59.88073076,
       48.59578748, 54.70045319, 60.50302427, 60.5603878 , 54.41503666,
       51.1518629 , 54.68394673, 43.10117002, 54.84282063, 48.51444455,
       50.58376261, 60.68091749, 54.91389294, 51.08736041, 52.28206478,
       60.69674007, 52.08510105, 49.39854102, 58.64848505, 49.7644466 ,
       54.05760569, 57.97071321, 55.26784079, 60.64899878, 59.68778544,
       60.2641032 , 47.5533869 , 58.85325174, 53.20212442, 60.02

#### Model Evaluation

In [33]:
def MSE(Y,Y_hat):
    e_i = Y - Y_hat
    return np.mean(e_i**2)

def MAE(Y,Y_hat):
    e_i = Y - Y_hat
    return np.mean(np.abs(e_i))

def R2_Score(Y,Y_hat):
    y_mean = np.mean(Y)
    e_i = Y - Y_hat
    std_i = Y - y_mean

    E = np.sum(e_i**2) / np.sum(std_i ** 2)
    return 1 - E

def RMSE(Y,Y_hat):
    e_i = Y - Y_hat
    return np.sqrt(np.mean((Y - Y_hat) ** 2))

In [34]:
mse = MSE(Y, Y_hat)
mae = MAE(Y, Y_hat)
r2 = R2_Score(Y, Y_hat)
rmse = RMSE(Y,Y_hat)

print(f"MSE : {mse}")
print(f"MAE : {mae}")
print(f"R2 Score : {r2}")
print(f"RMSE : {rmse}")

MSE : 266.32831119587524
MAE : 13.831031002111498
R2 Score : 0.0678983250142231
RMSE : 16.31956835200843
