### Preprocessing Using Scikit-learn


In [1]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
import numpy as np
import pandas as pd


In [3]:
data = pd.DataFrame({
    'age': [25, np.nan, 22, 28, 27],
    'salary': [50000, 54000, np.nan, 57000, 60000],
    'city': ['New York', 'Paris', 'Berlin', 'London', 'Tokyo']
})


In [4]:
numeric_features = ['age', 'salary']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [5]:

categorical_features = ['city']
categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])


In [6]:

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


In [7]:
preprocessed_data = preprocessor.fit_transform(data)
print(preprocessed_data)

[[-0.24397502 -1.58654445  0.          0.          1.          0.
   0.        ]
 [ 0.         -0.37774868  0.          0.          0.          1.
   0.        ]
 [-1.70782513  0.          1.          0.          0.          0.
   0.        ]
 [ 1.21987509  0.52884815  0.          1.          0.          0.
   0.        ]
 [ 0.73192505  1.43544498  0.          0.          0.          0.
   1.        ]]
