Construcción de tuberias de estimadores --- 8:41 min
===

* 8:41 min | Última modificación: Septiembre 30, 2021 | [YouTube](https://youtu.be/2lvlJWIZJF8)

https://scikit-learn.org/stable/modules/compose.html

Creación de un pipeline asignando nombres a las componentes
---

In [1]:
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

#
# Creación de una tubería de estimadores
# asignando un identificador
#
estimators = [
    ("reduce_dim", PCA()),
    ("clf", SVC()),
]

pipeline = Pipeline(
    # -------------------------------------------------------------------------
    # List of (name, transform) tuples (implementing fit/transform) that are
    # chained, in the order in which they are chained, with the last object an
    # estimator
    steps=estimators,
    # -------------------------------------------------------------------------
    # If True, the time elapsed while fitting each step will be printed as it
    # is completed.
    verbose=False,
)
pipeline

Pipeline(memory=None,
         steps=[('reduce_dim',
                 PCA(copy=True, iterated_power='auto', n_components=None,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('clf',
                 SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='rbf', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

Creación de un pipeline usando make_pipeline
---

In [2]:
#
# Creación de una tubería con identificadores
# por defecto para sus componentes
#
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Binarizer

make_pipeline(
    # -------------------------------------------------------------------------
    # List of the scikit-learn estimators that are chained together.
    # -------------------------------------------------------------------------
    Binarizer(
        # ---------------------------------------------------------------------
        # Feature values below or equal to this are replaced by 0, above it
        # by 1
        threshold=0.0
    ),
    # -------------------------------------------------------------------------
    MultinomialNB(
        # ---------------------------------------------------------------------
        # Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).
        alpha=1.0,
    ),
)

Pipeline(memory=None,
         steps=[('binarizer', Binarizer(copy=True, threshold=0.0)),
                ('multinomialnb',
                 MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],
         verbose=False)

Acceso a las componentes del pipeline
---

In [3]:
#
# Acceso a la tupla mediante un índice
#
pipeline.steps[0]

('reduce_dim',
 PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
     svd_solver='auto', tol=0.0, whiten=False))

In [4]:
#
# Acceso al estimador mediante un índice
#
pipeline[0]

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [5]:
#
# Accesso al estimador usando su nombre
#
pipeline["reduce_dim"]

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

Actualización de los parámetros de los estimadores 
---

In [6]:
#
# Actualización de los parámetros de un estimador
# en la tubería
#
pipeline.set_params(clf__C=10)

Pipeline(memory=None,
         steps=[('reduce_dim',
                 PCA(copy=True, iterated_power='auto', n_components=None,
                     random_state=None, svd_solver='auto', tol=0.0,
                     whiten=False)),
                ('clf',
                 SVC(C=10, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='rbf', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

Búsqueda de hiperparámetros óptimos con GridSearchCV
---

In [7]:
#
# Especificación de una malla de valores de parámetros en la tubería para
# buscar su combinación óptima
#
from sklearn.model_selection import GridSearchCV

param_grid = dict(
    reduce_dim__n_components=[2, 5, 10],
    clf__C=[0.1, 10, 100],
)

grid_search = GridSearchCV(
    # -------------------------------------------------------------------------
    # esetimador/modelo
    estimator=pipeline,
    # -------------------------------------------------------------------------
    # Dictionary with parameters names (str) as keys and lists of parameter
    # settings to try as values
    param_grid=param_grid,
)

In [8]:
#
# Creación de los modelos por fuera de la
# tubería e inspección de sus valores
#
from sklearn.datasets import load_digits

X_digits, y_digits = load_digits(return_X_y=True)

pca = PCA()
clf = SVC()

pipeline = Pipeline(
    [
        ("reduce_dim", pca),
        ("clf", clf),
    ],
)

pipeline.fit(X_digits, y_digits)

pca.components_

array([[-1.77484909e-19, -1.73094651e-02, -2.23428835e-01, ...,
        -8.94184677e-02, -3.65977111e-02, -1.14684954e-02],
       [ 3.27805401e-18, -1.01064569e-02, -4.90849204e-02, ...,
         1.76697117e-01,  1.94547053e-02, -6.69693895e-03],
       [-1.68358559e-18,  1.83420720e-02,  1.26475543e-01, ...,
         2.32084163e-01,  1.67026563e-01,  3.48043832e-02],
       ...,
       [ 0.00000000e+00,  4.64905892e-16,  7.75864167e-17, ...,
         1.04083409e-16, -6.24500451e-17, -1.52655666e-16],
       [-0.00000000e+00,  1.66533454e-16,  2.23116834e-16, ...,
        -2.77555756e-16,  1.11022302e-16,  1.66533454e-16],
       [ 1.00000000e+00, -1.68983002e-17,  5.73338351e-18, ...,
         8.66631300e-18, -1.57615962e-17,  4.07058917e-18]])