In [14]:
import pickle
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

import numpy as np


#generador de pipelines
def make_pipeline(use_pca: bool =False, components: int =0, num_var: list=[], cat_var: list=[]):
    
    if use_pca:
        num_pipeline = Pipeline([
            ("imputer", SimpleImputer()),
            ("ss", StandardScaler()),
            ("pca", PCA(n_components=components))
        ])
    else:
        num_pipeline = Pipeline([
            ("imputer", SimpleImputer()),
            ("ss", StandardScaler())
        ])

    ct = ColumnTransformer([
        ("cat", OneHotEncoder(), cat_var),
        ("num", num_pipeline, num_var)
    ])

    pipeline = Pipeline([
        ("ct", ct)
    ])

    return pipeline

def fit(pipeline: Pipeline, X, y):
    pipeline = pipeline.fit(X, y)
    return pipeline

def export(pipeline: Pipeline, file: str):
    with open(file, "wb") as f:
        pickle.dump(pipeline, f)


# Crear el pipeline
generated_pipeline = make_pipeline(use_pca=True, components=0, num_var=["age", "fare", "sibsp", "parch"], cat_var=["pclass", "sex", "embarked"])


# carga del dataset
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)

# preprocesamiento
X.rename(columns={
    "home.dest": "homedest"
}, inplace=True)

# division train_test
seed = 1234
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Entrenamiento 
generated_pipeline = fit(generated_pipeline, X_train, y_train)

# exportar pipeline
export(generated_pipeline, "generated_pipeline.pkl")

In [6]:
# ejemplo de uso
pipeline = Pipeline([
        ("generated_pipeline", generated_pipeline),
        ("model",DecisionTreeClassifier())
    ])

pipeline.fit(X_test, y_test)

pipeline.predict(X_test.head())

array(['0', '0', '0', '0', '0'], dtype=object)

In [8]:
pipeline.score(X_test, y_test)

0.8015267175572519

In [1]:
from pipelinegenerator.generator import make_pipeline, fit, export_pipeline


from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

import numpy as np 


# Crear el pipeline
generated_pipeline = make_pipeline(use_pca=True, components=3, num_var=["age", "fare", "sibsp", "parch"], cat_var=["pclass", "sex", "embarked"])

# carga del dataset
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)

# division train_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Entrenamiento 
generated_pipeline = fit(generated_pipeline, X_train, y_train)

generated_pipeline.fit_transform(X_test)

# exportar pipeline
#export_pipeline(generated_pipeline, "generated_pipeline.pkl")

array([[ 0.        ,  0.        ,  1.        , ..., -0.7307595 ,
        -0.41574814,  0.0730215 ],
       [ 1.        ,  0.        ,  0.        , ...,  0.09047565,
         0.94289816, -0.63220904],
       [ 0.        ,  1.        ,  0.        , ..., -0.09839279,
         0.00777931, -0.12057858],
       ...,
       [ 0.        ,  1.        ,  0.        , ..., -0.95835125,
         0.23241633,  0.36658599],
       [ 1.        ,  0.        ,  0.        , ..., -0.17548307,
         0.8743123 , -0.13886154],
       [ 1.        ,  0.        ,  0.        , ..., -0.0105688 ,
         4.41676871, -1.081589  ]])

In [None]:

generated_pipeline.fit_transform(X_test.head(3))

In [11]:
generated_pipeline.fit(X_test.head())

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

[0;31mSignature:[0m [0mPipeline[0m[0;34m.[0m[0mfit[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mX[0m[0;34m,[0m [0my[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;34m**[0m[0mfit_params[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Fit the model.

Fit all the transformers one after the other and transform the
data. Finally, fit the transformed data using the final estimator.

Parameters
----------
X : iterable
    Training data. Must fulfill input requirements of first step of the
    pipeline.

y : iterable, default=None
    Training targets. Must fulfill label requirements for all steps of
    the pipeline.

**fit_params : dict of string -> object
    Parameters passed to the ``fit`` method of each step, where
    each parameter name is prefixed such that parameter ``p`` for step
    ``s`` has key ``s__p``.

Returns
-------
self : object
    Pipeline with fitted steps.
[0;31mFile:[0m      ~/cursoMLOps/88_ejercicios/EjercicioNivel3/envEjN3/lib/python3.9/