In [20]:
import numpy as np
from sklearn import linear_model
import sklearn.metrics as sm
from sklearn.preprocessing import PolynomialFeatures

In [21]:
# Input file containing data
input_file = './files/data_multivar_regr.txt'

In [22]:
# Load the data from the input file
data = np.loadtxt(input_file, delimiter=',')
X, y = data[:, :-1], data[:, -1]

In [23]:
X

array([[ 2.06,  3.48,  7.21],
       [ 6.37,  3.01,  7.27],
       [ 1.18,  1.2 ,  5.42],
       ...,
       [ 3.61,  2.22,  5.77],
       [ 1.38,  2.69, -0.33],
       [ 5.35,  5.  ,  5.08]])

In [24]:
y

array([ 15.69,  15.34,   0.66,  38.37,   9.96,  23.4 ,  24.15,  24.89,
        30.23,  41.2 ,  41.35,  29.32,  22.85,  21.27,  15.88,  14.21,
        35.16,  39.08,  26.12,   3.9 ,  12.64,  33.7 ,   6.83,  33.47,
        36.05,   4.67,  24.09,  34.73,  35.51,  27.5 ,  49.47,  27.05,
        35.34,  31.82,  18.7 ,  -0.38,  41.75,  41.74,  32.11,  16.86,
        14.41,  17.82,  20.97,  26.3 ,   9.05,  15.33,   6.86,  32.85,
        19.93,  42.34,   3.61,  36.07,  26.98,  35.39,  34.44,  34.27,
        31.25,  19.96,  33.83,  28.35,   7.12,  25.25,  26.46,  10.66,
        20.06,  39.58,  19.96,  27.79,  23.84,  -3.79,  27.42,  -8.  ,
        23.15,  39.55,  32.17,  11.3 ,  49.68,  40.24,  49.52,   8.87,
        47.47,  14.47,  30.69,  31.27,  47.26,  32.2 ,   8.83,  47.21,
        25.95,  27.29,  -7.36,  32.78,  42.04,  41.51,  16.42,   6.23,
        35.7 ,  21.35,  13.48,  17.21,  14.97,  27.89,  22.08,  20.5 ,
        34.06,  28.63,  31.23,  -1.2 ,  -1.84,  17.74,  28.4 ,  -1.3 ,
      

In [25]:
# Split data into training and testing
num_training = int(0.8 * len(X))
num_test = len(X) - num_training
# Training data
X_train, y_train = X[:num_training], y[:num_training]
# Test data
X_test, y_test = X[num_training:], y[num_training:]

In [26]:
# Create the linear regressor model
linear_regressor = linear_model.LinearRegression()
# Train the model using the training sets linear_regressor.
linear_regressor.fit(X_train, y_train)
# Predict the output for the test dataset
# Predict the output
y_test_pred = linear_regressor.predict(X_test)

In [27]:
# Measure performance
print("Linear Regressor performance:")
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,y_test_pred), 2))
print("Explained variance score =", round(sm.explained_variance_score(y_test, y_test_pred), 2))
print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2))

Linear Regressor performance:
Mean absolute error = 3.58
Mean squared error = 20.31
Median absolute error = 2.99
Explained variance score = 0.86
R2 score = 0.86


In [28]:
"""
Create a polynomial regressor of degree 10. Train the regressor on the training dataset.
Letâ€™s take a sample datapoint and see how to perform prediction. The first step is to
transform it into a polynomial:
"""
# Polynomial regression
polynomial = PolynomialFeatures(degree=10)
X_train_transformed = polynomial.fit_transform(X_train)
datapoint = [[7.75, 6.35, 5.56]]
poly_datapoint = polynomial.fit_transform(datapoint)


In [29]:
poly_linear_model = linear_model.LinearRegression()
poly_linear_model.fit(X_train_transformed, y_train)
print("\nLinear regression:\n", linear_regressor.predict(datapoint))
print("\nPolynomial regression:\n", poly_linear_model.predict(poly_datapoint))



Linear regression:
 [36.05286276]

Polynomial regression:
 [41.46211642]
