In [1]:
%matplotlib widget
import sys
import numpy as np
import pandas as pd
import IPython
import scipy as sp
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import mglearn

In [2]:
mglearn.plots.plot_linear_regression_wave()

w[0]: 0.393906  b: -0.031804


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Working with LinearRegression

### Working with LinearRegression on a one-dimensional dataset (with a single feature).

In [3]:
from sklearn.linear_model import LinearRegression
X, y = mglearn.datasets.make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y ,random_state=42)

lr = LinearRegression().fit(X_train, y_train)

print("lr coefficient = {}".format(lr.coef_))
print("lr intercept = {}".format(lr.intercept_))

lr coefficient = [0.39390555]
lr intercept = -0.031804343026759746


In [4]:
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

Training set score: 0.67
Test set score: 0.66


### Working with LinearRegression on a more complex dataset, extendende boston.

In [5]:
from mglearn.datasets import load_extended_boston

X, y = load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
lr = LinearRegression().fit(X_train, y_train)

In [6]:
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

Training set score: 0.95
Test set score: 0.61


## Working with Ridge Regression

##### Increasing the alpha parameter of ridge regression

In [7]:
from sklearn.linear_model import Ridge

ridge = Ridge().fit(X_train, y_train)

## The following results are for boston extended dataset
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))

Training set score: 0.89
Test set score: 0.75


In [8]:
# Increasing alpha forces coefficients to move more toward zero which decreases
## training set performance but helps generalization.
### the default value for alpha is 1
ridge10 = Ridge(alpha=10).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

Training set score: 0.79
Test set score: 0.64


In [9]:
# Decreasing alpha allows coefficients to be less restricted, meaning -> moving towards overfitting.
## Every feature will have a considerable weight in the prediction of any label for any data point.
ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge01.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge01.score(X_test, y_test)))

Training set score: 0.93
Test set score: 0.77


In [10]:
%matplotlib widget
plt.plot(ridge.coef_, label="Ridge alpha=1")
plt.plot(ridge10.coef_, label="Ridge alpha=10")
plt.plot(ridge01.coef_, label="Ridge alpha=0.1")
plt.plot(lr.coef_, 'o', label="LinearRegression")

plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.hlines(0, 0, len(lr.coef_))
plt.ylim(-25, 25)
plt.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7f65ec93ad60>

#### Some takeaways: ALways rememebr, alpha corresponds to restriction. From the last plot, for alpha= 0.1, the restriction is small, and so coefficients range from -17 to 24! For alpha=1, more restriction, coes ranging from -10:10. For alpha=10, even more restriction, from -3:3. For LinearRegression, when no regularization of any kind happens, that is, no restriction happens: the coefficients became to be so large, that some of them fell outside the chart. This corresponds to alpha=0.

## Working with Lasso Regression

In [11]:
from sklearn.linear_model import Lasso

lasso = Lasso().fit(X_train, y_train)

# The following results indicates an underfit model.
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso.coef_ != 0)))

Training set score: 0.29
Test set score: 0.21
Number of features used: 4


In [12]:
lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso001.coef_ != 0)))

Training set score: 0.90
Test set score: 0.77
Number of features used: 33


In [13]:
lasso00001 = Lasso(alpha=0.0001, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso00001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso00001.coef_ != 0)))

Training set score: 0.95
Test set score: 0.64
Number of features used: 96


In [14]:
%matplotlib widget

## This lasso is underfit, and only uses 4 features of the 104 given
#plt.plot(lasso.coef_, 's', label="Lasso alpha=1")

## This lasso predicts well, 0.90 training and 0.77 testing. It's slightly better than ridge and only uses
### 33 features, meaning model simplicity.
plt.plot(lasso001.coef_, '^', label="Lasso alpha=0.01")

## This lasso with a very small alpha, is almost like using LinearRegression model.
plt.plot(lasso00001.coef_, 'v', label="Lasso alpha=0.0001")

plt.plot(ridge01.coef_, 'o', label="Ridge alpha=0.1")
plt.legend(ncol=2, loc=(0, 1.05))
plt.ylim(-25, 25)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, 'Coefficient magnitude')

## Using linear models for classification: LogisticRegression and LinearSVC.

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

X, y = mglearn.datasets.make_forge()
fig, axes = plt.subplots(1 ,2, figsize=(10, 3))

for model, ax in zip([LinearSVC(), LogisticRegression()], axes):
    clf = model.fit(X, y)
    mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5, ax=ax, alpha=.7)
    mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
    ax.set_title("{}".format(clf.__class__.__name__))
    ax.set_xlabel("Feature 0")
    ax.set_ylabel("Feature 1")

axes[0].legend()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



<matplotlib.legend.Legend at 0x7f65e71d1f70>

In [16]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target,random_state=42)

# In the following we are using a default value of C = 1.
## The test and training accuracy are very high. BUT they are very close!! (Actually after adjusting the number of maximum iteration, they are the same!) SO we can predict that we
### are UNDERFITTING!
#### Quiz: Increase/decrease C to move away from underfitting? INCREASING.
logreg = LogisticRegression(max_iter=10000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg.score(X_test, y_test)))

Training set score: 0.958
Test set score: 0.958


In [17]:
### Moving away from underfitting, decreasing regularization, towards overfitting.
logreg100 = LogisticRegression(C=100, max_iter=10000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg100.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg100.score(X_test, y_test)))

Training set score: 0.981
Test set score: 0.972


In [18]:
### Moving more and more towards underfitting, and losing more and more of both, the test and training accuracy.
logreg001 = LogisticRegression(C=0.01, max_iter=10000).fit(X_train, y_train)
print("Training set score: {:.3f}".format(logreg001.score(X_train, y_train)))
print("Test set score: {:.3f}".format(logreg001.score(X_test, y_test)))

Training set score: 0.953
Test set score: 0.951


In [19]:
%matplotlib widget
plt.plot(logreg.coef_.T, 'o', label="C=1")
plt.plot(logreg100.coef_.T, '^', label="C=100")
plt.plot(logreg001.coef_.T, 'v', label="C=0.001")

plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0, 0, cancer.data.shape[1])

plt.ylim(-5, 5)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7f65e70ed8b0>

In [24]:
%matplotlib widget
for C, marker in zip([0.001, 1, 100], ['o', '^', 'v']):
    lr_l1 = LogisticRegression(C=C, penalty="l1", solver="liblinear", max_iter=10000).fit(X_train, y_train)
    print("Training accuracy of l1 logreg with C={:.3f}: {:.2f}".format(C, lr_l1.score(X_train, y_train)))
    print("Test accuracy of l1 logreg with C={:.3f}: {:.2f}".format(C, lr_l1.score(X_test, y_test)))
    plt.plot(lr_l1.coef_.T, marker, label="C={:.3f}".format(C))

plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
plt.hlines(0, 0, cancer.data.shape[1])
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.ylim(-5, 5)
plt.legend(loc=3)

Training accuracy of l1 logreg with C=0.001: 0.91
Test accuracy of l1 logreg with C=0.001: 0.92


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Training accuracy of l1 logreg with C=1.000: 0.96
Test accuracy of l1 logreg with C=1.000: 0.96
Training accuracy of l1 logreg with C=100.000: 0.99
Test accuracy of l1 logreg with C=100.000: 0.98


<matplotlib.legend.Legend at 0x7f65dfe74bb0>

# Multi-classification models.

## Using LinearSVC with ovr (one-vs.-rest) technique.

In [34]:
%matplotlib widget
from sklearn.datasets import make_blobs

X, y = make_blobs(random_state=42)
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.legend(["Class 0", "Class 1", "Class 2"])

linear_svm = LinearSVC().fit(X, y)
print("Coefficient shape: ", linear_svm.coef_.shape)
print("Intercept shape: ", linear_svm.intercept_.shape)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Coefficient shape:  (3, 2)
Intercept shape:  (3,)


In [31]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples=500, n_features=8, n_informative=5, n_redundant=3, n_classes=4, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify= y, random_state=96)

#define classification model
Multiclass_model = LogisticRegression(multi_class='ovr')

#fit model
Multiclass_model.fit(X_train, y_train)

#make final predictions
print("Testing score = {}, Training score = {}".format(Multiclass_model.score(X_test, y_test), Multiclass_model.score(X_train, y_train)))

Testing score = 0.624, Training score = 0.6106666666666667


In [38]:
%matplotlib widget
mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
line = np.linspace(-15, 15)
for coef, intercept, color in zip(linear_svm.coef_, linear_svm.intercept_, ['b', 'r', 'g']):
    plt.plot(line, -(line * coef[0] + intercept) / coef[1], c=color)
plt.ylim(-10, 15)
plt.xlim(-10, 8)
plt.xlabel("Feature 0")
plt.ylabel("Feature 1")
plt.legend(['Class 0', 'Class 1', 'Class 2', 'Line class 0', 'Line class 1', 'Line class 2'], loc=(1.01, 0.3))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7f65df4acfa0>