In [1]:
import numpy as np
from sklearn import linear_model as lm
from sklearn import preprocessing as skp
np.set_printoptions(precision=2)

# Let us create some fake data - y = (3x1+2x2+1)^2

In [2]:
x1 = np.linspace(2, 12, num=10, endpoint=False)
x2 = np.linspace(5, 15, num=10, endpoint=False)
y = np.power(3*x1 + 2*x2 + 1, 2)
all_x = np.array([x1,x2]).transpose()

In [3]:
# Let us print out the all_x array to see what it is 
print(all_x) 

[[ 2.  5.]
 [ 3.  6.]
 [ 4.  7.]
 [ 5.  8.]
 [ 6.  9.]
 [ 7. 10.]
 [ 8. 11.]
 [ 9. 12.]
 [10. 13.]
 [11. 14.]]


In [4]:
#Now call the polynomial preprocessor on the all_x array 
poly_2 = skp.PolynomialFeatures(degree=2)
poly_2.fit(all_x)
new_x = poly_2.transform(all_x)
print(new_x)

[[  1.   2.   5.   4.  10.  25.]
 [  1.   3.   6.   9.  18.  36.]
 [  1.   4.   7.  16.  28.  49.]
 [  1.   5.   8.  25.  40.  64.]
 [  1.   6.   9.  36.  54.  81.]
 [  1.   7.  10.  49.  70. 100.]
 [  1.   8.  11.  64.  88. 121.]
 [  1.   9.  12.  81. 108. 144.]
 [  1.  10.  13. 100. 130. 169.]
 [  1.  11.  14. 121. 154. 196.]]


In [6]:
# We can now try to fit the generated value to a linear regressor and see its value
model = lm.Ridge(alpha=0.5)
model.fit(new_x, y)
print(model.score(new_x, y))

0.9999999946655317


In [7]:
# The R2 score is good, let us validate by printing predictions and originals 

predictions = model.predict(new_x)
print('Predictions:', predictions[0:10])
print('Actuals:', y[0:10])

Predictions: [ 288.86  483.95  729.02 1024.07 1369.09 1764.09 2209.07 2704.03 3248.96
 3843.87]
Actuals: [ 289.  484.  729. 1024. 1369. 1764. 2209. 2704. 3249. 3844.]


In [8]:
#In scikit learn, one can define a pipeline -- which consists of various preprocessors and models. 
#it provides an easier approach to calculate instead of using the expanded pipelines 
#Input data is new_x, output is y 

from sklearn.pipeline import Pipeline
these_functions = [('PreProcess', skp.PolynomialFeatures(degree=2)), ('Regress', lm.Ridge(alpha=0.5))]
pipe = Pipeline(these_functions)
pipe.fit(all_x, y)
print(pipe.score(all_x,y))

0.9999999946655317


In [9]:
predictions = pipe.predict(all_x)
print(predictions[0:10])
print(y[0:10])

[ 288.86  483.95  729.02 1024.07 1369.09 1764.09 2209.07 2704.03 3248.96
 3843.87]
[ 289.  484.  729. 1024. 1369. 1764. 2209. 2704. 3249. 3844.]
