In [2]:
import sys
import numpy as np
filename = "Python-Machine-Learning-Cookbook/Chapter01/data_multivar.txt"
X = []
y = []

with open(filename, 'r') as f:
    for line in f.readlines():
        data = [float(i) for i in line.split(',')]
        xt, yt = data[:-1], data[-1]
        X.append(xt)
        y.append(yt)

print ("X: ", X)

X:  [[0.39, 2.78, 7.11], [1.65, 6.7, 2.42], [5.67, 6.38, 3.79], [2.31, 6.27, 4.8], [3.67, 6.67, 2.38], [3.64, 3.14, 2.38], [7.0, 3.85, 8.39], [8.9, 1.44, 3.99], [4.11, 6.75, 4.87], [5.44, 4.57, 3.15], [3.27, 5.41, 2.52], [2.68, 2.14, 5.75], [1.7, 3.19, 2.4], [1.25, 5.44, 2.54], [1.06, 4.38, 4.78], [1.55, 7.69, 2.33], [6.11, -0.74, 4.08], [3.47, 4.41, 5.34], [4.99, 2.43, 0.79], [1.39, 4.5, -0.3], [-0.13, 1.47, 4.46], [2.37, 7.91, 6.81], [4.06, 2.13, 10.66], [7.07, 3.06, -3.01], [6.11, 2.1, 5.01], [3.73, 7.12, 5.96], [1.57, 7.46, 1.42], [2.4, 6.25, 4.31], [3.99, -0.73, 3.06], [-1.25, -0.66, 2.02], [6.4, 5.24, 1.32], [1.55, 0.69, 8.3], [4.2, 2.81, 3.41], [3.4, -0.26, 4.58], [2.83, 6.96, 6.55], [6.19, 5.66, 6.65], [8.63, 5.57, 2.89], [-0.99, 10.22, 3.76], [3.74, 1.03, 7.55], [4.16, 5.16, 2.91], [6.38, 4.41, 2.3], [8.57, 4.82, 2.22], [2.98, 5.5, 6.58], [3.54, 6.2, 10.25], [1.94, 0.24, 8.3], [6.44, 5.56, 4.54], [0.74, 3.22, 3.95], [1.58, 6.17, 4.59], [6.9, 8.74, 2.26], [8.35, -0.47, 5.26], [

In [3]:
num_training = int(0.8*len(X))
num_test = len(X) - num_training

# Training data
X_train = np.array(X[:num_training])
y_train = np.array(y[:num_training])

# Testing data
X_test = np.array(X[num_training:])
y_test = np.array(y[num_training:])
print("X_train: ", X_train)


X_train:  [[0.39 2.78 7.11]
 [1.65 6.7  2.42]
 [5.67 6.38 3.79]
 ...
 [2.16 1.13 0.74]
 [7.04 3.19 1.16]
 [1.65 0.62 0.17]]


In [4]:
from sklearn import linear_model

print("X_train: ", X_train)
# Create linear regression object
linear_regressor = linear_model.LinearRegression()

# Train the model using the training sets
linear_regressor.fit(X_train, y_train)

# Create ridge regressor object
ridge_regressor = linear_model.Ridge(alpha=0.01, fit_intercept=True, max_iter=10000)

# Train the model using the training sets
ridge_regressor.fit(X_train, y_train)

# Predict the output
y_test_pred = linear_regressor.predict(X_test)
y_test_pred_ridge = ridge_regressor.predict(X_test)

X_train:  [[0.39 2.78 7.11]
 [1.65 6.7  2.42]
 [5.67 6.38 3.79]
 ...
 [2.16 1.13 0.74]
 [7.04 3.19 1.16]
 [1.65 0.62 0.17]]


In [5]:
# Report on the metrics of this linear regression
import sklearn.metrics as sm

print ("Linear Regressor\n--------------------")
print ("Mean absolute error:      ", round(sm.mean_absolute_error(y_test, y_test_pred), 3))
print ("Mean squared error:       ", round(sm.mean_squared_error(y_test, y_test_pred), 3))
print ("Median absolute error:    ", round(sm.median_absolute_error(y_test, y_test_pred), 3))
print ("Explained variance score: ", round(sm.explained_variance_score(y_test, y_test_pred), 3))
print ("R2 score:                 ", round(sm.r2_score(y_test, y_test_pred), 3))

Linear Regressor
--------------------
Mean absolute error:       3.95
Mean squared error:        23.146
Median absolute error:     3.686
Explained variance score:  0.836
R2 score:                  0.834


In [9]:
print ("Ridge Regressor\n--------------------")
print ("Mean absolute error:      ", round(sm.mean_absolute_error(y_test, y_test_pred_ridge), 3))
print ("Mean squared error:       ", round(sm.mean_squared_error(y_test, y_test_pred_ridge), 3))
print ("Median absolute error:    ", round(sm.median_absolute_error(y_test, y_test_pred_ridge), 3))
print ("Explained variance score: ", round(sm.explained_variance_score(y_test, y_test_pred_ridge), 3))
print ("R2 score:                 ", round(sm.r2_score(y_test, y_test_pred_ridge), 3))

Ridge Regressor
--------------------
Mean absolute error:       3.95
Mean squared error:        23.145
Median absolute error:     3.686
Explained variance score:  0.836
R2 score:                  0.834


In [17]:
from sklearn.preprocessing import PolynomialFeatures

polynomial = PolynomialFeatures(degree=100)
print (X_train)
X_train_transformed = polynomial.fit_transform(X_train)

[[0.39 2.78 7.11]
 [1.65 6.7  2.42]
 [5.67 6.38 3.79]
 ...
 [2.16 1.13 0.74]
 [7.04 3.19 1.16]
 [1.65 0.62 0.17]]


In [None]:
datapoint = [[0.39, 2.78, 7.11]]
poly_datapoint = polynomial.fit_transform(datapoint)

poly_linear_model = linear_model.LinearRegression()
poly_linear_model.fit(X_train_transformed, y_train)
print ("Linear regression:     ", linear_regressor.predict(datapoint))
print ("Polymomial regression: ", poly_linear_model.predict(poly_datapoint)[0])

Linear regression:      [-11.0587295]
Polymomial regression:  10.252554570286645
