# Regression

In [1]:
from sklearn.datasets import make_regression

### Problem 1

Train model to represent a linear relationship between the feature vector and the target vector.

In [2]:
features, target = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [3]:
from sklearn.linear_model import LinearRegression
regression = LinearRegression()

In [4]:
model = regression.fit(features, target)

In [5]:
model.intercept_

-0.009650118178816669

In [6]:
model.coef_

array([1.95531234e-02, 4.42087450e+01, 5.81494563e+01])

In [7]:
model.score(features, target)

0.9999901732607787

### Problem 2

Features interact. You have features whose effect on target variable depend on another feature.

In [8]:
features, target = make_regression(
    n_samples=100,
    n_features=2,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [9]:
from sklearn.preprocessing import PolynomialFeatures

interaction = PolynomialFeatures(
    degree=3,
    include_bias=False,
    interaction_only=True
)

interaction_features = interaction.fit_transform(features)

In [10]:
interaction_features[0]

array([0.0465673 , 0.80186103, 0.0373405 ])

In [11]:
features[:,0][0]

0.04656729842414554

In [12]:
features[:,1][0]

0.8018610318713447

In [13]:
(features[:,0] * features[:,1])[0]

0.037340501965846186

In [14]:
from sklearn.linear_model import LinearRegression
regression = LinearRegression()

In [15]:
model = regression.fit(interaction_features, target)

In [16]:
model.score(interaction_features, target)

0.9999931258397594

### Problem 3

Model a non-linear relationship.

In [17]:
features, target = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [18]:
polynomial = PolynomialFeatures(
    degree=3,
    include_bias=False)

polynomial_features = polynomial.fit_transform(features)

In [19]:
polynomial_features[0]

array([ 0.58591043,  0.78477065, -0.95542526,  0.34329103,  0.45980531,
       -0.55979363,  0.61586497, -0.74978971,  0.91283743,  0.2011378 ,
        0.26940473, -0.32798893,  0.36084171, -0.43930961,  0.53484097,
        0.48331276, -0.58841296,  0.71636803, -0.87214794])

In [20]:
features[0]

array([ 0.58591043,  0.78477065, -0.95542526])

In [21]:
features[0] ** 2

array([0.34329103, 0.61586497, 0.91283743])

In [22]:
features[0] ** 3

array([ 0.2011378 ,  0.48331276, -0.87214794])

Using a pipeline to measure performance

In [23]:
from sklearn.linear_model import LinearRegression
regression = LinearRegression()

In [24]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [25]:
from sklearn.pipeline import make_pipeline
pipeline = make_pipeline(scaler, regression)

In [26]:
from sklearn.model_selection import KFold
kf = KFold(
    n_splits=5,
    shuffle=True,
    random_state=0
)

In [27]:
from sklearn.model_selection import cross_val_score
cv_result = cross_val_score(
    pipeline, 
    polynomial_features, 
    target, 
    cv=kf, 
    n_jobs=-1)

cv_result.mean()

0.9999788464981355

### Problem 4

Reduce variance of the linear regression model.

#### Ridge / Squared / **L2** Regularization

In [28]:
features, target = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [29]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [30]:
from sklearn.linear_model import Ridge
ridge_regression = Ridge(alpha=0.5)

In [31]:
from sklearn.pipeline import make_pipeline
pipeline = make_pipeline(scaler, ridge_regression)

In [32]:
from sklearn.model_selection import KFold
kf = KFold(
    n_splits=5,
    shuffle=True,
)

In [39]:
from sklearn.model_selection import cross_val_score
cv_score = cross_val_score(
    estimator=pipeline,
    X=features,
    y=target,
    cv=kf,
    n_jobs=-1
)

In [40]:
cv_score.mean()

0.9999385843868838

Determining the best value of hyperparameter **α** using **RidgeCV**. Higher **α** means greater bias and lesser variance.

In [41]:
features, target = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [45]:
from sklearn.preprocessing import StandardScaler
standardized_features = StandardScaler().fit_transform(features)

In [65]:
from sklearn.linear_model import RidgeCV
regr_cv = RidgeCV(alphas=[0.1, 1.0, 10.0])
model_cv = regr_cv.fit(standardized_features, target)

In [66]:
model_cv.coef_

array([1.29223201e-02, 4.40972291e+01, 5.38979372e+01])

In [67]:
model_cv.alpha_

0.1

#### Lasso / Absolute / **L1** Regularization

In [87]:
features, target = make_regression(
    n_samples=100,
    n_features=3,
    n_informative=2,
    n_targets=1,
    noise=0.2,
    coef=False,
    random_state=1
)

In [90]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
standardized_features = scaler.fit_transform(features)

In [116]:
from sklearn.linear_model import Lasso
regression = Lasso(alpha=0.1)
model = regression.fit(standardized_features, target)

In lasso regression a coefficient can get shrunk to 0 

In [117]:
model.coef_

array([ 0.        , 44.03520465, 53.84426278])

In [118]:
model.intercept_

5.82784123228848