In [1]:
import numpy as np
from sklearn import linear_model
import sklearn.metrics as sm
from sklearn.preprocessing import PolynomialFeatures

In [3]:
input_file = "data_multivar_regr.txt"

In [8]:
data = np.loadtxt(input_file, delimiter=',')
X, Y = data[:, :-1], data[:, -1]

In [9]:
num_training = int(0.8*len(X))
num_test = len(X) - num_training
X_train, Y_train = X[:num_training], Y[:num_training]
X_test, Y_test = X[num_training:], Y[num_training:]

In [10]:
linear_regressor = linear_model.LinearRegression()
linear_regressor.fit(X_train, Y_train)
Y_test_pred = linear_regressor.predict(X_test)



In [11]:
print("Linear Regressor performance:")
print("Mean absolute error =", round(sm.mean_absolute_error(Y_test, Y_test_pred), 2))
print("Mean squared error =", round(sm.mean_squared_error(Y_test, Y_test_pred), 2))
print("Median absolute error =", round(sm.median_absolute_error(Y_test, Y_test_pred), 2))
print("Explained variance score =", round(sm.explained_variance_score(Y_test, Y_test_pred), 2))
print("R2 score =", round(sm.r2_score(Y_test, Y_test_pred), 2))

Linear Regressor performance:
Mean absolute error = 3.58
Mean squared error = 20.31
Median absolute error = 2.99
Explained variance score = 0.86
R2 score = 0.86


In [14]:
#polynomial regression
polynomial = PolynomialFeatures(degree=10) #각 항들의 상호작용을 나타내는 변수 (eg. X1*X2, X2*X3, X1*X2*X3 ... 등)를 포함한다.
X_train_transformed = polynomial.fit_transform(X_train)
datapoint = [[7.75, 6.35, 5.56]]
poly_datapoint = polynomial.fit_transform(datapoint)

In [21]:
print(X_train.shape)
print(X_train)

(560, 3)
[[ 2.06  3.48  7.21]
 [ 6.37  3.01  7.27]
 [ 1.18  1.2   5.42]
 ..., 
 [ 0.67  7.41  2.35]
 [ 8.26  2.93  2.69]
 [ 1.21  2.18  3.17]]


In [20]:
print(X_train_transformed.shape)
print(X_train_transformed)

[[  1.00000000e+00   2.06000000e+00   3.48000000e+00 ...,   8.84383448e+07
    1.83230019e+08   3.79623114e+08]
 [  1.00000000e+00   6.37000000e+00   3.01000000e+00 ...,   7.06981872e+07
    1.70756087e+08   4.12424170e+08]
 [  1.00000000e+00   1.18000000e+00   1.20000000e+00 ...,   1.07239995e+06
    4.84367310e+06   2.18772568e+07]
 ..., 
 [  1.00000000e+00   6.70000000e-01   7.41000000e+00 ...,   5.10715824e+04
    1.61967906e+04   5.13663401e+03]
 [  1.00000000e+00   8.26000000e+00   2.93000000e+00 ...,   2.35371286e+04
    2.16091727e+04   1.98391381e+04]
 [  1.00000000e+00   1.21000000e+00   2.18000000e+00 ...,   4.84604101e+04
    7.04676605e+04   1.02469029e+05]]
(560, 286)


In [19]:
poly_linear_model = linear_model.LinearRegression()
poly_linear_model.fit(X_train_transformed, Y_train)
print("\nLinear regression:\n", linear_regressor.predict(datapoint))
print("\nPolynomial regression:\n", poly_linear_model.predict(poly_datapoint))


Linear regression:
 [ 36.05286276]

Polynomial regression:
 [ 41.47360915]
