In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import time
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from scipy.sparse import random as sparse_random
from sklearn.random_projection import GaussianRandomProjection

In [44]:
data = pd.read_csv("StudentPerformanceFactors_Encoded.csv")
cols = ['Sleep_Hours', 'Motivation_Level', 'Family_Income', 'School_Type', 'Gender']
data = data.drop(columns=cols, axis = 1)
x = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [52]:
def get_sparse_jl_matrix(d, cols):
    rows = 100*d
    return sparse_random(rows, cols, density=0.1, format='csr', data_rvs= np.random.randn)

In [53]:
D = x

In [54]:
n, d = x.shape

In [55]:
def project_data(D, y):
    M = get_sparse_jl_matrix(d, n)
        
    E = M @ D
    z = M @ y
    
    return E, z

In [56]:
def eval(x, y,  model):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    
    x_train, y_train = project_data(x, y)
    
    start_time = time.time()
    model.fit(x_train, y_train)
    full_time = time.time() - start_time
    
    y_pred = model.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    metrics = {
        "Time" : full_time,
        "MSE" : mse,
        "MAE" : mae,
        "RMSE" : rmse,
        "R2" : r2
    }
    
    return metrics
    

In [61]:
model = LinearRegression()
m = eval(x, y, model)



In [62]:
m

{'Time': 0.003147125244140625,
 'MSE': 1.2691015997981763,
 'MAE': 0.9155840627137695,
 'RMSE': 1.1265440958072508,
 'R2': 0.8835555940868411}

In [63]:
model = DecisionTreeRegressor(random_state=42)
m = eval(x, y, model)



In [64]:
m

{'Time': 0.02236461639404297,
 'MSE': 999.4025565005338,
 'MAE': 29.71748419368079,
 'RMSE': 31.613328779180055,
 'R2': -90.69859763655153}

In [65]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
m = eval(x, y, model)



In [66]:
m

{'Time': 1.5833396911621094,
 'MSE': 400.5450155136806,
 'MAE': 19.780649793126642,
 'RMSE': 20.01362074972144,
 'R2': -35.7513730818595}

In [67]:
model = SVR(kernel='rbf')
m = eval(x, y, model)



In [68]:
m

{'Time': 0.09522771835327148,
 'MSE': 24.008715819036,
 'MAE': 4.262672862205676,
 'RMSE': 4.899868959373913,
 'R2': -1.2028816689933248}

In [69]:
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
m = eval(x, y, model)

In [70]:
m

{'Time': 0.2183668613433838,
 'MSE': 200.12495613809037,
 'MAE': 13.261354562152155,
 'RMSE': 14.14655280052672,
 'R2': -17.36214817600326}