In [44]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [1]:
import inspect

The basics: https://scikit-learn.org/stable/tutorial/basic/tutorial.html

Validation (section 3.1): https://scikit-learn.org/stable/modules/cross_validation.html

Parameter tuning (section 3.2.1): https://scikit-learn.org/stable/modules/grid_search.html

Validation and learning curves: https://scikit-learn.org/stable/modules/learning_curve.html

PCA (section 2.5.1): https://scikit-learn.org/stable/modules/decomposition.html#decompositions

Preprocessing (section 3.6.1, only intro): https://scikit-learn.org/stable/modules/preprocessing.html

# Metrics

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html

In [2]:
from sklearn.metrics import mean_squared_error
type(mean_squared_error)

function

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html

In [3]:
from sklearn.metrics import mean_absolute_error
type(mean_absolute_error)

function

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html

In [4]:
from sklearn.metrics import make_scorer
type(make_scorer)

function

# Preprocessing

https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

In [5]:
from sklearn.preprocessing import PolynomialFeatures
inspect.getmro(PolynomialFeatures)

(sklearn.preprocessing._data.PolynomialFeatures,
 sklearn.base.TransformerMixin,
 sklearn.base.BaseEstimator,
 object)

https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html

In [6]:
from sklearn.preprocessing import StandardScaler
inspect.getmro(StandardScaler)

(sklearn.preprocessing._data.StandardScaler,
 sklearn.base.TransformerMixin,
 sklearn.base.BaseEstimator,
 object)

# Dimentionality Reduction

https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

In [7]:
from sklearn.decomposition import PCA
inspect.getmro(PCA)

(sklearn.decomposition._pca.PCA,
 sklearn.decomposition._base._BasePCA,
 sklearn.base.TransformerMixin,
 sklearn.base.BaseEstimator,
 object)

# Utils

https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html

In [8]:
from sklearn.pipeline import Pipeline
inspect.getmro(Pipeline)

(sklearn.pipeline.Pipeline,
 sklearn.utils.metaestimators._BaseComposition,
 sklearn.base.BaseEstimator,
 object)

# Model Validation and Selection

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html

In [9]:
from sklearn.model_selection import train_test_split
type(train_test_split)

function

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

In [10]:
from sklearn.model_selection import GridSearchCV
inspect.getmro(GridSearchCV)

(sklearn.model_selection._search.GridSearchCV,
 sklearn.model_selection._search.BaseSearchCV,
 sklearn.base.MetaEstimatorMixin,
 sklearn.base.BaseEstimator,
 object)

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html

In [11]:
from sklearn.model_selection import cross_val_score
type(cross_val_score)

function

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.validation_curve.html

In [12]:
from sklearn.model_selection import validation_curve
type(validation_curve)

function

https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.learning_curve.html

In [13]:
from sklearn.model_selection import learning_curve
type(learning_curve)

function

# Models

https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html

In [14]:
from sklearn.dummy import DummyRegressor
inspect.getmro(DummyRegressor)

(sklearn.dummy.DummyRegressor,
 sklearn.base.MultiOutputMixin,
 sklearn.base.RegressorMixin,
 sklearn.base.BaseEstimator,
 object)

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [15]:
from sklearn.linear_model import LinearRegression
inspect.getmro(LinearRegression)

(sklearn.linear_model._base.LinearRegression,
 sklearn.base.MultiOutputMixin,
 sklearn.base.RegressorMixin,
 sklearn.linear_model._base.LinearModel,
 sklearn.base.BaseEstimator,
 object)

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html

In [16]:
from sklearn.linear_model import Ridge
inspect.getmro(Ridge)

(sklearn.linear_model._ridge.Ridge,
 sklearn.base.MultiOutputMixin,
 sklearn.base.RegressorMixin,
 sklearn.linear_model._ridge._BaseRidge,
 sklearn.linear_model._base.LinearModel,
 sklearn.base.BaseEstimator,
 object)

## Plotting

https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html

In [17]:
from matplotlib.pyplot import plot
type(plot)

function

https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html

In [18]:
from matplotlib.pyplot import scatter
type(scatter)

function

# Numpy

Creating tensors (section 1 and 2): https://numpy.org/doc/stable/user/basics.creation.html 

Indexing (upto masks): https://numpy.org/doc/stable/user/basics.creation.html

Mathematical functions: https://numpy.org/doc/stable/reference/routines.math.html

Array manipulation: https://numpy.org/doc/stable/reference/routines.array-manipulation.html

# Definitions

<ul>
    <li> Feature space, target space, target function, data distribution, hypothesis set, learning algorithm
    <li> Supervised vs unsupervised learning, classification, regression, multiclass, multilabel
    <li> Risk, empirical risk, empirical risk minimization, structural risk minimization
    <li> Overfitting, underfitting
    <li> Approximation error, estimation error
    <li> Training set, test set, validation set
    <li> Feature engineering, feature selection
    <li> Validation, cross validation, parameter tuning, model selection
    <li> Dimentionality reduction
    <li> Regularization
    <li> Validation curve, learning curve
    <li> Pearson correlation coefficient
</ul>

# Concepts

<ul>
    <li> Bias-complexity tradeoff
    <li> Linearly separable
    <li> Curse of dimentionality
    <li> Blessing of dimentionality
    <li> No free lunch
</ul>

# The Machine Learning Pipeline

In [19]:
from sklearn.datasets import load_boston
X, y = load_boston(return_X_y=True)

## Level 1

In [20]:
model = Ridge()
model.fit(X,y)

Ridge()

## Level 2: with validation

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Train time

In [22]:
model = Ridge()
model.fit(X_train,y_train)

Ridge()

Test time

In [23]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

16.41669023343412

## Level 3: with selection for the regularization parameter

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Model selection

In [25]:
X_train_bis, X_valid, y_train_bis, y_valid = train_test_split(X_train,y_train)
for alpha in [0.1,1.0]:
    model = Ridge(alpha=alpha)
    model.fit(X_train_bis,y_train_bis)
    y_pred = model.predict(X_valid)
    print(mean_squared_error(y_valid,y_pred))

19.487381184295206
19.203174097622995


Train time

In [26]:
model = Ridge(alpha=0.1)
model.fit(X_train,y_train)

Ridge(alpha=0.1)

Test time

In [27]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

26.12972465129502

## Level 4: with selection by cross-validation, smarter data usage

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Model selection

In [29]:
for alpha in [0.1,1.0]:
    model = Ridge(alpha=alpha)
    score = cross_val_score(model,X_train,y_train,scoring=make_scorer(mean_squared_error))
    print(score.mean())

22.29853986531623
22.385533862360496


Train time

In [30]:
model = Ridge(alpha=0.1)
model.fit(X_train,y_train)

Ridge(alpha=0.1)

Test time

In [31]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

32.66986342947974

## Level 5: same, but shorter

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Combined model selection and training

In [33]:
params = {'alpha': [0.1,1]}
model = GridSearchCV(Ridge(),params)
model.fit(X_train,y_train)

GridSearchCV(estimator=Ridge(), param_grid={'alpha': [0.1, 1]})

Test time

In [34]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

18.583771009092533

## Level 6: Data preprocessing, branching from 2

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Train time

In [36]:
preprocessor = StandardScaler()
preprocessor.fit(X_train,y_train)
model = Ridge()
model.fit(preprocessor.transform(X_train),y_train)

Ridge()

Test time

In [37]:
y_pred = model.predict(preprocessor.transform(X_test))
mean_squared_error(y_test,y_pred)

22.121115077898722

## Level 7: same, with pipeline

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Train time

In [39]:
model = Pipeline([('pre',StandardScaler()),('model',Ridge())])
model.fit(X_train,y_train)

Pipeline(steps=[('pre', StandardScaler()), ('model', Ridge())])

Test time

In [40]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

32.2231829898658

## Level 8: all at once

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

Combined model selection and training

In [42]:
model_in = Pipeline([('pre',StandardScaler()),('model',Ridge())])
params = {'model__alpha': [0.1,1]}
model = GridSearchCV(model_in,params)
model.fit(X_train,y_train)

GridSearchCV(estimator=Pipeline(steps=[('pre', StandardScaler()),
                                       ('model', Ridge())]),
             param_grid={'model__alpha': [0.1, 1]})

Test time

In [43]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred)

23.457860412541564

Note: in the future, show two stages of model selection. One where we chose a family of models with validation and learning curves. The other one where we use gridsearch model selection to tune the regularizer.