In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data_full = pd.read_csv("cleandata.csv", sep=(","))

In [2]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data_full,
 test_size=0.2, random_state=123)
print('Train size: ', len(train_set), 'Test size: ', len(test_set))

Train size:  816 Test size:  204


In [30]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

X = train_set[['AVG_SCORE', 'TEST_YEAR']]
y = train_set['EXPENSE_PER_STUDENT']
X_test = test_set[['AVG_SCORE', 'TEST_YEAR']]
y_test = test_set['EXPENSE_PER_STUDENT']

lrTwoInput_model = LinearRegression()
lrTwoInput_model.fit(X,y)

y_pred = lrTwoInput_model.predict(X)
print('Results for linear regression on training data')
print('Input: Score, Test Year')
print(' Default settings')
print('Internal parameters:')
print(' Bias is ', lrTwoInput_model.intercept_)
print(' Coefficients', lrTwoInput_model.coef_)
print(' Score', lrTwoInput_model.score(X,y))
print('MAE is ', mean_absolute_error(y, y_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y, y_pred)))
print('MSE is ', mean_squared_error(y, y_pred))
print('R^2 ', r2_score(y,y_pred))

y_test_pred = lrTwoInput_model.predict(X_test)
print()
print('Results for linear regression on test data')
print('Input: Score, Test Year')
print('MAE is ', mean_absolute_error(y_test, y_test_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y_test,
y_test_pred)))
print('MSE is ', mean_squared_error(y_test, y_test_pred))
print('R^2 ', r2_score(y_test,y_test_pred))

Results for linear regression on training data
Input: Score, Test Year
 Default settings
Internal parameters:
 Bias is  -8.982018401372585
 Coefficients [ 0.07699569 -0.83706805]
 Score 0.15262824960105847
MAE is  1.343334327505814
RMSE is  1.8593782373312229
MSE is  3.4572874294609655
R^2  0.15262824960105847

Results for linear regression on test data
Input: Score, Test Year
MAE is  1.3280942163231917
RMSE is  1.7736128203896422
MSE is  3.145702436650501
R^2  0.12891639653197684


In [68]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [84]:
#2 variable linear regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

X = train_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y = train_set['AVG_SCORE']
X_test = test_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y_test = test_set['AVG_SCORE']

lrTwoInput_model = LinearRegression()
lrTwoInput_model.fit(X,y)

y_pred = lrTwoInput_model.predict(X)
print('Results for linear regression on training data')
print('Input: Expense, Test Year')
print(' Default settings')
print('Internal parameters:')
print(' Bias is ', lrTwoInput_model.intercept_)
print(' Coefficients', lrTwoInput_model.coef_)
print(' Score', lrTwoInput_model.score(X,y))
print('MAE is ', mean_absolute_error(y, y_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y, y_pred)))
print('MSE is ', mean_squared_error(y, y_pred))
print('R^2 ', r2_score(y,y_pred))

y_test_pred = lrTwoInput_model.predict(X_test)
print()
print('Results for linear regression on test data')
print('Input: Expense, Test Year')
print('MAE is ', mean_absolute_error(y_test, y_test_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y_test,
y_test_pred)))
print('MSE is ', mean_squared_error(y_test, y_test_pred))
print('R^2 ', r2_score(y_test,y_test_pred))

Results for linear regression on training data
Input: Expense, Test Year
 Default settings
Internal parameters:
 Bias is  179.6748009124244
 Coefficients [ 1.98223774 10.84604436]
 Score 0.8452275682051464
MAE is  7.115020587469477
RMSE is  9.43435916688901
MSE is  89.00713288986269
R^2  0.8452275682051464

Results for linear regression on test data
Input: Expense, Test Year
MAE is  6.719951640034663
RMSE is  8.89734075805416
MSE is  79.16267256493175
R^2  0.868928688206606


In [86]:
#Polynomial regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
power = 6
poly_process = PolynomialFeatures(degree=power, include_bias=False)

X = train_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y = train_set['AVG_SCORE']
X_poly = poly_process.fit_transform(X)

X_test = test_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y_test = test_set['AVG_SCORE']
X_poly_test = poly_process.fit_transform(X_test)

lr_model = LinearRegression()
lr_model.fit(X_poly,y)

y_pred = lr_model.predict(X_poly)
print('Results for linear regression on training data')
print('Polynomial regression with degree ', power)
print(' Default settings')
print('Internal parameters:')
print(' Bias is ', lr_model.intercept_)
print(' Coefficients', lr_model.coef_)
print(' Score', lr_model.score(X_poly,y))
print('MAE is ', mean_absolute_error(y, y_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y, y_pred)))
print('MSE is ', mean_squared_error(y, y_pred))
print('R^2 ', r2_score(y,y_pred))

y_test_pred = lr_model.predict(X_poly_test)
print()
print('Results for linear regression on test data')
print('MAE is ', mean_absolute_error(y_test, y_test_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y_test,
y_test_pred)))
print('MSE is ', mean_squared_error(y_test, y_test_pred))
print('R^2 ', r2_score(y_test,y_test_pred))

Results for linear regression on training data
Polynomial regression with degree  6
 Default settings
Internal parameters:
 Bias is  8.491269220772438
 Coefficients [ 9.04090731e-03  2.99191113e-09 -4.40140267e-02  3.49783131e-02
  3.58708371e-08  1.01365998e-01 -1.64131945e-01  1.30430723e-01
  3.34791614e-07 -1.04496908e-01  3.46642551e-01 -5.61134488e-01
  4.45862657e-01  2.86964181e-06  3.66790153e-02 -2.76772850e-01
  9.15998676e-01 -1.48139161e+00  1.17656875e+00  2.37223719e-05
 -6.40718011e-04  1.28669647e-04  2.26268577e-02 -1.00577528e-01
  1.79604285e-01 -1.48780046e-01  1.92839931e-04]
 Score 0.8587784993249336
MAE is  6.656683596511812
RMSE is  9.011892733651335
MSE is  81.21421064283773
R^2  0.8587784993249336

Results for linear regression on test data
MAE is  6.602485081699719
RMSE is  8.810855094985389
MSE is  77.63116750482997
R^2  0.8714644335363396


In [91]:
#Polynomial regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
power = 2
poly_process = PolynomialFeatures(degree=power, include_bias=False)

X = train_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y = train_set['AVG_SCORE']
X_poly = poly_process.fit_transform(X)

X_test = test_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y_test = test_set['AVG_SCORE']
X_poly_test = poly_process.fit_transform(X_test)

lr_model = LinearRegression()
lr_model.fit(X_poly,y)

y_pred = lr_model.predict(X_poly)
print('Results for linear regression on training data')
print('Polynomial regression with degree ', power)
print(' Default settings')
print('Internal parameters:')
print(' Bias is ', lr_model.intercept_)
print(' Coefficients', lr_model.coef_)
print(' Score', lr_model.score(X_poly,y))
print('MAE is ', mean_absolute_error(y, y_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y, y_pred)))
print('MSE is ', mean_squared_error(y, y_pred))
print('R^2 ', r2_score(y,y_pred))

y_test_pred = lr_model.predict(X_poly_test)
print()
print('Results for linear regression on test data')
print('MAE is ', mean_absolute_error(y_test, y_test_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y_test,
y_test_pred)))
print('MSE is ', mean_squared_error(y_test, y_test_pred))
print('R^2 ', r2_score(y_test,y_test_pred))

Results for linear regression on training data
Polynomial regression with degree  2
 Default settings
Internal parameters:
 Bias is  192.7557169519023
 Coefficients [ 6.90649912  0.07828486 -0.30235392 -0.0898886   0.93941831]
 Score 0.854064256323466
MAE is  6.7838910131878745
RMSE is  9.161075038952918
MSE is  83.92529586932619
R^2  0.854064256323466

Results for linear regression on test data
MAE is  6.5273857549967715
RMSE is  8.821396552413258
MSE is  77.81703713492851
R^2  0.8711566852573817


In [101]:
#Elastic Net with Poly features
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
power = 6
poly_process = PolynomialFeatures(degree=power, include_bias=False)

X = train_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y = train_set['AVG_SCORE']
X_poly = poly_process.fit_transform(X)

X_test = test_set[['EXPENSE_PER_STUDENT', 'TEST_YEAR']]
y_test = test_set['AVG_SCORE']
X_poly_test = poly_process.fit_transform(X_test)

reg_lr_model = ElasticNet(alpha=0.3, l1_ratio=0.5)
reg_lr_model.fit(X_poly,y)

y_pred = reg_lr_model.predict(X_poly)
print('Results for elastic net on training data')
print('Polynomial regression with degree ', power)
print(' Default settings')
print('Internal parameters:')
print(' Bias is ', reg_lr_model.intercept_)
print(' Coefficients', reg_lr_model.coef_)
print(' Score', reg_lr_model.score(X_poly,y))
print('MAE is ', mean_absolute_error(y, y_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y, y_pred)))
print('MSE is ', mean_squared_error(y, y_pred))
print('R^2 ', r2_score(y,y_pred))

y_test_pred = reg_lr_model.predict(X_poly_test)
print()
print('Results for elastic net on test data')
print('MAE is ', mean_absolute_error(y_test, y_test_pred))
print('RMSE is ', np.sqrt(mean_squared_error(y_test,
y_test_pred)))
print('MSE is ', mean_squared_error(y_test, y_test_pred))
print('R^2 ', r2_score(y_test,y_test_pred))

Results for elastic net on training data
Polynomial regression with degree  6
 Default settings
Internal parameters:
 Bias is  209.51761457115742
 Coefficients [ 0.00000000e+00  0.00000000e+00  3.05306000e-01  5.17428936e-01
  2.94870102e-01 -2.51177858e-02  3.41970101e-02  1.14818417e-02
  1.76981762e-02 -2.16459109e-03  1.67703944e-03 -7.85187510e-04
 -2.87749106e-03  2.09572725e-03  1.33827759e-06 -2.10138511e-06
  1.39427237e-04 -2.38144988e-04 -3.35631150e-04  3.03472001e-04
  8.61092257e-06 -1.45726094e-05  6.06074125e-06  1.55650889e-05
 -3.10883797e-05 -3.78231251e-05  3.88081381e-05]
 Score 0.8547747791055283
MAE is  6.761306991479674
RMSE is  9.138746394361096
MSE is  83.51668566044793
R^2  0.8547747791055283

Results for elastic net on test data
MAE is  6.562453595934035
RMSE is  8.851493658114228
MSE is  78.34893997963638
R^2  0.8702760024640448


  model = cd_fast.enet_coordinate_descent(
