In [None]:
import pandas as pd

In [None]:
from sklearn.pipeline import Pipeline, make_pipeline

In [None]:
from sklearn.compose import ColumnTransformer, make_column_selector

In [None]:
from sklearn.impute import SimpleImputer

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
from sklearn.decomposition import PCA

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

In [None]:
df = pd.read_csv("../sample_dataset.csv")

In [None]:
X = df.iloc[:,0:-1]
y = df.iloc[:,-1]

# Transformation of numerical and categorical columns

**Numerical variables:**
* Blank filling with median value
* Standardization

**Categorical variables**
* Blank filling with most frequent value
* One-hot encoding

In [None]:
transformer = ColumnTransformer([
    ('numerical', make_pipeline(SimpleImputer(strategy='median'), StandardScaler()), make_column_selector(dtype_exclude="object")),
    ('categorical', make_pipeline(SimpleImputer(strategy="most_frequent"), OneHotEncoder(sparse_output=False)), make_column_selector(dtype_include="object"))
])

# PCA

In [None]:
pca = PCA(n_components = 10)

# Feature selection

In [None]:
selector = SelectKBest(f_classif, k = 5)

# Pipeline

In [None]:
pipeline = Pipeline([
    ('transformation', transformer),
    ('pca', pca),
    ('feature_selection', selector)
])

In [None]:
pipeline.fit_transform(X,y)

In [None]:
X1 = transformer.fit_transform(X)

In [None]:
X2 = pca.fit_transform(X1)

In [None]:
X3 = selector.fit_transform(X2,y)

In [None]:
X3

Change PCA components to 15 and feature selection variables to 3

In [None]:
pipeline.set_params(pca__n_components = 15, feature_selection__k = 3)

In [None]:
pipeline.fit_transform(X,y)

Change the numerical cleaning strategy to 'mean'

In [None]:
pipeline.set_params(transformation__numerical__simpleimputer__strategy = 'mean')

In [None]:
pipeline.fit_transform(X,y)