## Linear regression

In [84]:
# Import packages 
import numpy as np
from sklearn.linear_model import LinearRegression

In [85]:
# Provide data
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

In [86]:
# Check x 
print(x)

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]


In [87]:
# Check y 
print(y)

[ 5 20 14 32 22 38]


In [88]:
# Create and fit model 
model = LinearRegression()
model.fit(x, y)

LinearRegression()

In [89]:
# Evaluate coefficient of determination (R^2)
r_sq = model.score(x, y)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.7158756137479542


In [90]:
# Evaluation intercept (b0)
print('intercept:', model.intercept_)

intercept: 5.633333333333329


In [91]:
# Evaluate slope (b1)
print('slope:', model.coef_)

slope: [0.54]


In [92]:
# Predict response 
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [93]:
# Apply to new data
x_new = np.arange(5).reshape((-1, 1))
y_new = model.predict(x_new)
print(y_new)

[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]


## Multiple linear regression

In [94]:
# Import packages
import numpy as np
from sklearn.linear_model import LinearRegression

In [95]:
# Provide data
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [96]:
# Check x
print(x)

[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]


In [97]:
# Check y
print(y)

[ 4  5 20 14 32 22 38 43]


In [98]:
# Create and fit model
model = LinearRegression().fit(x, y)

In [99]:
# Evaluate R^2 and coefficients (b0, b1, b2)
r_sq = model.score(x, y)
print('coefficient of determination:', r_sq, '\n' 'intercept:', model.intercept_, '\n' 'slope:', model.coef_)

coefficient of determination: 0.8615939258756775 
intercept: 5.52257927519819 
slope: [0.44706965 0.25502548]


In [100]:
# Predict response 
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


## Polynomial regression 

In [101]:
# Import packages 
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [102]:
# Provide data
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

In [103]:
# Transform x to include x^2 & other terms 
transformer = PolynomialFeatures(degree=2, include_bias=False)
transformer.fit(x)
x_ = transformer.transform(x)

In [104]:
# Check x_
print(x_)

[[   5.   25.]
 [  15.  225.]
 [  25.  625.]
 [  35. 1225.]
 [  45. 2025.]
 [  55. 3025.]]


In [105]:
# Create and fit model 
model = LinearRegression().fit(x_, y)

In [106]:
# Evaluate R^2 and coefficients
r_sq = model.score(x_, y)
print('coefficient of determination:', r_sq, '\n' 'intercept:', model.intercept_, '\n' 'slope:', model.coef_)

coefficient of determination: 0.8908516262498563 
intercept: 21.37232142857144 
slope: [-1.32357143  0.02839286]


In [107]:
# Predict response
y_pred = model.predict(x_)
print('predicted response:', y_pred, sep='\n')

predicted response:
[15.46428571  7.90714286  6.02857143  9.82857143 19.30714286 34.46428571]


## Polynomial regression with multiple input variables 

In [108]:
# Import packages
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [109]:
# Provide data
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [110]:
# Transform x 
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

In [111]:
# Check x_
print(x_)

[[0.000e+00 1.000e+00 0.000e+00 0.000e+00 1.000e+00]
 [5.000e+00 1.000e+00 2.500e+01 5.000e+00 1.000e+00]
 [1.500e+01 2.000e+00 2.250e+02 3.000e+01 4.000e+00]
 [2.500e+01 5.000e+00 6.250e+02 1.250e+02 2.500e+01]
 [3.500e+01 1.100e+01 1.225e+03 3.850e+02 1.210e+02]
 [4.500e+01 1.500e+01 2.025e+03 6.750e+02 2.250e+02]
 [5.500e+01 3.400e+01 3.025e+03 1.870e+03 1.156e+03]
 [6.000e+01 3.500e+01 3.600e+03 2.100e+03 1.225e+03]]


In [112]:
# Create and fit model 
model = LinearRegression().fit(x_, y)

In [113]:
# Evaluate R^2 and coefficients
# f(x1, x2) = b0 + b1x2 + b2x2 + b3(x1)^2 + b4x1x2 + b5(x2)^2
r_sq = model.score(x_, y)
print('coefficient of determination:', r_sq, '\n' 'intercept:', model.intercept_, '\n' 'slope:', model.coef_)

coefficient of determination: 0.9453701449127822 
intercept: 0.8430556452395876 
slope: [ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]


In [114]:
# Predict response 
y_pred = model.predict(x_)
print('predicted response:', y_pred, sep='\n')

predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
 39.05631386 41.92339046]
