## Test of module linear_model.LinearRegression with diabetes data

Remember to move this notebook to parent directory to execute tests.

In [1]:
import numpy as np
import sklearn.model_selection
#from sklearn.metrics import r2_score
from metrics import mean_squared_error as fh_mse
from metrics import r2_score as fh_r2

In [2]:
# load data

data = np.loadtxt('data/diabetes.csv',delimiter=',')

features, targets = data[:,:-1], data[:,-1]

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(features, targets, test_size=0.2, 
                                                                            random_state=42)

In [3]:
# fhlearn model

from linear_model import LinearRegression as fh_model

# Create linear regression object
regr = fh_model()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

f_mse = fh_mse(y_test,y_pred)
f_r2 = fh_r2(y_test,y_pred)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f'
      % f_mse)
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % f_r2)

print('fh R2',regr.score(X_test,y_test))

Coefficients: 
 [  37.90031426 -241.96624835  542.42575342  347.70830529 -931.46126093
  518.04405547  163.40353476  275.31003837  736.18909839   48.67112488]
Mean squared error: 2900.17
Coefficient of determination: 0.45
fh R2 0.45260660216173676


In [4]:
# sklearn model

from sklearn.linear_model import LinearRegression as sk_model

regr = sk_model()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
sk_y_pred = regr.predict(X_test)

sk_mse = fh_mse(y_test,sk_y_pred)
sk_r2 = fh_r2(y_test,sk_y_pred)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f'
      % sk_mse)
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % sk_r2)

print('fh R2',regr.score(X_test,y_test))

Coefficients: 
 [  37.90031426 -241.96624835  542.42575342  347.70830529 -931.46126093
  518.04405547  163.40353476  275.31003837  736.18909839   48.67112488]
Mean squared error: 2900.17
Coefficient of determination: 0.45
fh R2 0.452606602161738


In [5]:
if np.allclose(y_pred, sk_y_pred, rtol=1e-05, atol=1e-08):
    print('fhlearn and sklearn making similar predictions on the test features within a tolerance of 0.00000001')
else:
    print('fhlearn and sklearn predictions differ :(')

fhlearn and sklearn making similar predictions on the test features within a tolerance of 0.00000001


In [6]:
if round(f_mse,8) == round(sk_mse,8):
    print('fhlearn and sklearn computing similar MSE')
else:
    print('fhlearn and sklearn computing differing MSE :(')

fhlearn and sklearn computing similar MSE


In [7]:
if round(f_r2,8) == round(sk_r2,8):
    print('fhlearn and sklearn computing similar R2')
else:
    print('fhlearn and sklearn computing differing R2 :(')

fhlearn and sklearn computing similar R2
