In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split,GridSearchCV,StratifiedKFold,cross_val_score
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error,roc_curve,accuracy_score
from sklearn.preprocessing import StandardScaler,OneHotEncoder,OrdinalEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

In [2]:
df = pd.read_csv('/Users/mac/Developer/Data Science Project/data/StudentsPerformance.csv')
df.shape

(999, 8)

In [3]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [4]:
df.rename(columns={'race/ethnicity':'ethnicity','parental level of education':'edu_level','test preparation course':'course'},inplace=True)

In [5]:

df['avg_score'] = round((df['math score'] + df['reading score'] + df['writing score']) / 3,2)
df.head()

Unnamed: 0,gender,ethnicity,edu_level,lunch,course,math score,reading score,writing score,avg_score
0,female,group B,bachelor's degree,standard,none,72,72,74,72.67
1,female,group C,some college,standard,completed,69,90,88,82.33
2,female,group B,master's degree,standard,none,90,95,93,92.67
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.33
4,male,group C,some college,standard,none,76,78,75,76.33


In [6]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

categorical = x.select_dtypes(include='object').columns
numerical = x.select_dtypes(include=['int','float']).columns

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

transformer = make_column_transformer(
    (StandardScaler(),numerical),
    (OneHotEncoder(drop='first'),categorical),
)
x_train_transformed = transformer.fit_transform(x_train)
x_test_transformed = transformer.transform(x_test)

In [7]:
models = {
    'LinearRegression':LinearRegression(),
    'Lasso':Lasso(),
    'Ridge':Ridge(),
    'SVR':SVR(),
    'DecisionTreeRegressor':DecisionTreeRegressor(),
    'RandomForestRegressor':RandomForestRegressor(),
    'GradientBoostingRegressor':GradientBoostingRegressor(),
    'AdaBoostRegressor':AdaBoostRegressor(),
    'KNeighborsRegressor':KNeighborsRegressor(),
}
    

for name,model in models.items():
    model.fit(x_train_transformed,y_train)
    y_pred = model.predict(x_test_transformed)
    
    print(f"Name:->{name}")
    print(f"R2_score:->{r2_score(y_test,y_pred)}")
    ms =(mean_squared_error(y_test,y_pred))
    ma = (mean_absolute_error(y_test,y_pred))
    print(f"Mean Squared Error:->{ms}")
    print(f"Mean Absolute Error:->{ma}")
    
    print('-'*100)
print('model train successfull.')

Name:->LinearRegression
R2_score:->0.9999999644834917
Mean Squared Error:->7.354439022471276e-06
Mean Absolute Error:->0.0022398566767618533
----------------------------------------------------------------------------------------------------
Name:->Lasso
R2_score:->0.99440269615116
Mean Squared Error:->1.1590393276723827
Mean Absolute Error:->0.8525026142243912
----------------------------------------------------------------------------------------------------
Name:->Ridge
R2_score:->0.9999996687776394
Mean Squared Error:->6.858654675892839e-05
Mean Absolute Error:->0.006552289762803695
----------------------------------------------------------------------------------------------------
Name:->SVR
R2_score:->0.8683392891035148
Mean Squared Error:->27.263115592688937
Mean Absolute Error:->1.3167847204109029
----------------------------------------------------------------------------------------------------
Name:->DecisionTreeRegressor
R2_score:->0.985920001141251
Mean Squared Error:->2.9

In [8]:
lr = LinearRegression()
lr.fit(x_train_transformed,y_train)
y_pred = lr.predict(x_test_transformed)
print("R-squared value: ",r2_score(y_test,y_pred))

R-squared value:  0.9999999644834917


In [9]:
lr_param = {'fit_intercept':[True,False],'copy_X':[True,False],'positive':[True,False]}
grid = GridSearchCV(estimator=lr,param_grid=lr_param,n_jobs=-1,cv=10)
grid.fit(x_train_transformed,y_train)


In [10]:
grid.best_estimator_

In [11]:
grid.best_params_

{'copy_X': True, 'fit_intercept': True, 'positive': True}

In [12]:
lere = LinearRegression(fit_intercept=True,copy_X=True,positive=True)
lere.fit(x_train_transformed,y_train)
y_pred = lere.predict(x_test_transformed)
print("Linear Regression Model Accuracy:",r2_score(y_test,y_pred))

Linear Regression Model Accuracy: 0.9999999649697878
