In [47]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

<h2 style="font-size:28px;font-family:Calibri">
    Fitting a Line
</h2>
You want to train a model that represents a linear relationship between the
 feature and target vector.

In [2]:
features, target = make_regression(n_samples = 100,
                                  n_features = 3,
                                  n_informative = 2,
                                  n_targets = 1,
                                  noise = 0.2,
                                  coef = False)

regr = LinearRegression()
model = regr.fit(features, target)

In [3]:
model.intercept_

0.009458002437218838

In [4]:
model.coef_

array([-1.34923155e-02,  1.13547012e+01,  9.21506920e+01])

In [6]:
print(f"Actual Value: {target[0]}")
print(f"Predicted Value: {model.predict(features)[0]}")

Actual Value: 6.085854985752962
Predicted Value: 6.203298265256171


In [7]:
print(model.score(features, target))

0.9999968656861672


<h2 style="font-size:28px;font-family:Calibri">
    Handling Interactive Effects
</h2>
 Sometimes a feature’s effect on our target variable is at least partially
 dependent on another feature. For example, imagine a simple coffee-based
 example where we have two binary features— the presence of sugar
 (sugar) and whether or not we have stirred (stirred)—and we want to
 predict if the coffee tastes sweet. Just putting sugar in the coffee
 (sugar=1, stirred=0) won’t make the coffee taste sweet (all the
 sugar is at the bottom!) and just stirring the coffee without adding sugar
 (sugar=0, stirred=1) won’t make it sweet either. Instead it is the
 interaction of putting sugar in the coffee and stirring the coffee
 (sugar=1, stirred=1) that will make a coffee taste sweet. The effects
 of sugar and stirred on sweetness are dependent on each other. In this
 case we say there is an interaction effect between the features sugar and
 stirred.

In [11]:
features, target = make_regression(n_samples = 100,
                                  n_features = 2,
                                  n_informative = 2,
                                  n_targets = 1,
                                  noise = 0.2,
                                  coef = False)

interaction = PolynomialFeatures(degree = 3, include_bias = False, interaction_only = True)
features_interaction = interaction.fit_transform(features)
regr = LinearRegression()
model = regr.fit(features_interaction, target)

In [21]:
features[0]

array([-1.39117718, -2.1925686 ])

In [29]:
print(np.multiply(features[:, 0], features[:, 1])[0])
print(features_interaction[0, 2])

3.0502514052056817
3.0502514052056817


In [28]:
features_interaction[0, 2]

3.0502514052056817

<h2 style="font-size:28px;font-family:Calibri">
    Fitting a Non-Linear Relationship
</h2>
 Polynomial regression is an extension of linear regression to allow us to
 model nonlinear relationships.
 <br><br>
How are we able to use a linear
 regression for a nonlinear function? The answer is that we do not change
 how the linear regression fits the model, but rather only add polynomial
 features. That is, the linear regression does not “know” that the x2 is a
 quadratic transformation of x. It just considers it one more variable.

In [34]:
features, target = make_regression(n_samples = 100,
                                  n_features = 3,
                                  n_informative = 2,
                                  n_targets = 1,
                                  noise = 0.2,
                                  coef = False)

poly = PolynomialFeatures(degree = 3, include_bias = False)
features_poly = poly.fit_transform(features)
regr = LinearRegression()
model = regr.fit(features_poly, target)

In [39]:
print(features[0])
print(features[0] ** 2)
print(features[0] ** 3)

[-0.67250152 -0.82540303  0.37223946]
[0.45225829 0.68129017 0.13856222]
[-0.30414439 -0.56233897  0.05157832]


In [38]:
features_poly[0]

array([-0.67250152, -0.82540303,  0.37223946,  0.45225829,  0.55508479,
       -0.2503316 ,  0.68129017, -0.30724758,  0.13856222, -0.30414439,
       -0.37329537,  0.16834838, -0.45816867,  0.20662446, -0.0931833 ,
       -0.56233897,  0.25360308, -0.11436967,  0.05157832])

<h2 style="font-size:28px;font-family:Calibri">
    Reducing the Variance with Regularization.
</h2>

In [41]:
 features, target = make_regression(n_samples = 100,
                    n_features = 3,
                    n_informative = 2,
                    n_targets = 1,
                    noise = 0.2,
                    coef = False)

scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
regr = Ridge(alpha = 0.5)
model = regr.fit(features_standardized, target)

In [44]:
regr_cv = RidgeCV(alphas = [0.001, 0.01, 0.1, 1.0, 10.0])
model_cv = regr_cv.fit(features_standardized, target)
model_cv.coef_

array([1.01317865e+02, 6.60634459e+01, 1.19885220e-03])

In [45]:
model_cv.alpha_

0.001

<h2 style="font-size:28px;font-family:Calibri">
    Reducing Features with Lasso Regression
</h2>

In [48]:
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
regr = Lasso(alpha = 0.5)
model = regr.fit(features_standardized, target)

In [49]:
model.coef_

array([100.82717554,  65.57256353,  -0.        ])

In [50]:
 # Create lasso regression with a high alpha
 regression_a10 = Lasso(alpha=10)
 model_a10 = regression_a10.fit(features_standardized, target)
 model_a10.coef_

array([91.48909497, 56.23448297, -0.        ])