In [20]:
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

np.random.seed(0)

In [21]:
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)

In [22]:
numeric_features = ["age","fare"]
categorical_features = ["embarked","sex","pclass"]

In [23]:
numeric_transformer = Pipeline(
    steps=[("imputer",SimpleImputer(strategy="median")),
           ("scaler", StandardScaler())]
)
numeric_transformer


In [24]:
categorical_transformer = Pipeline(
    steps=[
        ("encoder", OneHotEncoder(handle_unknown="ignore")),
        ("selector", SelectPercentile(chi2, percentile=50)),
    ]
)
categorical_transformer

In [25]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)
preprocessor

In [26]:
preprocessor.fit_transform(X,y)

array([[-0.03900549,  3.44258413,  1.        ,  0.        ,  1.        ,
         0.        ],
       [-2.21595217,  2.2866387 ,  0.        ,  1.        ,  1.        ,
         0.        ],
       [-2.13197749,  2.2866387 ,  1.        ,  0.        ,  1.        ,
         0.        ],
       ...,
       [-0.23279919, -0.50377442,  0.        ,  1.        ,  0.        ,
         1.        ],
       [-0.19404045, -0.50377442,  0.        ,  1.        ,  0.        ,
         1.        ],
       [-0.03900549, -0.49120717,  0.        ,  1.        ,  0.        ,
         1.        ]])

In [None]:
import pandas as pd 
pd.DataFrame(preprocessor.fit_transform)