In [1]:
import pandas as pd
import numpy as np

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder
import warnings 
warnings.filterwarnings("ignore")

In [8]:
cat_cols = "cat1 cat1 cat2 cat3 cat2 np.nan cat3 cat3 cat2".split()

In [9]:
d = {
    "name" : ["Minato","Itachi","Gohan","Vegeta","Eren","Might_Guy","Levi","Goku","Mikasa"],
    "Anime":["Naruto","Naruto","DBZ",np.nan,"AOT","Naruto","DBZ",np.nan,"AOT"],
    "Levels":[20,15,50,48,30,np.nan,50,np.nan,25],
    "cat_cols":cat_cols
}

df = pd.DataFrame(d)
df

Unnamed: 0,name,Anime,Levels,cat_cols
0,Minato,Naruto,20.0,cat1
1,Itachi,Naruto,15.0,cat1
2,Gohan,DBZ,50.0,cat2
3,Vegeta,,48.0,cat3
4,Eren,AOT,30.0,cat2
5,Might_Guy,Naruto,,np.nan
6,Levi,DBZ,50.0,cat3
7,Goku,,,cat3
8,Mikasa,AOT,25.0,cat2


## Putting It All Together
* OrdinalEncoder
* OneHotEncoder
* Numerical

In [10]:
cat_features = ["cat_cols"]

cat_transformer = Pipeline(steps=[
    ("cat_imputer",SimpleImputer(strategy="constant",fill_value="missing")),
    ("ordinal",OrdinalEncoder(categories=[["cat1","cat2","cat3","np.nan"]]))
])

anime_features = ["Anime"]

anime_tranformer = Pipeline(steps=[
    ("anime_imputer",SimpleImputer(strategy="constant",fill_value="missing")),
    ("one_hot",OneHotEncoder(handle_unknown="ignore"))
])

level_features = ["Levels"]

level_tranformer = Pipeline(steps=[
    ("level_imputer", SimpleImputer(strategy="constant",fill_value=5555))
])

preprocessor = ColumnTransformer(transformers=[
    ("cat_imputer",cat_transformer,cat_features),
    ("anime_imputer",anime_tranformer,anime_features),
    ("level_imputer",level_tranformer,level_features)
],remainder="passthrough")

model = Pipeline(steps=[("preprocessor", preprocessor)])

transformed_df = model.fit_transform(df)
transformed_df

array([[0.0, 0.0, 0.0, 1.0, 0.0, 20.0, 'Minato'],
       [0.0, 0.0, 0.0, 1.0, 0.0, 15.0, 'Itachi'],
       [1.0, 0.0, 1.0, 0.0, 0.0, 50.0, 'Gohan'],
       [2.0, 0.0, 0.0, 0.0, 1.0, 48.0, 'Vegeta'],
       [1.0, 1.0, 0.0, 0.0, 0.0, 30.0, 'Eren'],
       [3.0, 0.0, 0.0, 1.0, 0.0, 5555.0, 'Might_Guy'],
       [2.0, 0.0, 1.0, 0.0, 0.0, 50.0, 'Levi'],
       [2.0, 0.0, 0.0, 0.0, 1.0, 5555.0, 'Goku'],
       [1.0, 1.0, 0.0, 0.0, 0.0, 25.0, 'Mikasa']], dtype=object)

In [16]:
preprocessor

In [14]:
model

In [11]:
data = pd.DataFrame(transformed_df)
data

Unnamed: 0,0,1,2,3,4,5,6
0,0.0,0.0,0.0,1.0,0.0,20.0,Minato
1,0.0,0.0,0.0,1.0,0.0,15.0,Itachi
2,1.0,0.0,1.0,0.0,0.0,50.0,Gohan
3,2.0,0.0,0.0,0.0,1.0,48.0,Vegeta
4,1.0,1.0,0.0,0.0,0.0,30.0,Eren
5,3.0,0.0,0.0,1.0,0.0,5555.0,Might_Guy
6,2.0,0.0,1.0,0.0,0.0,50.0,Levi
7,2.0,0.0,0.0,0.0,1.0,5555.0,Goku
8,1.0,1.0,0.0,0.0,0.0,25.0,Mikasa


In [12]:
data.iloc[2,5] = 6666

In [15]:
data

Unnamed: 0,0,1,2,3,4,5,6
0,0.0,0.0,0.0,1.0,0.0,20.0,Minato
1,0.0,0.0,0.0,1.0,0.0,15.0,Itachi
2,1.0,0.0,1.0,0.0,0.0,6666.0,Gohan
3,2.0,0.0,0.0,0.0,1.0,48.0,Vegeta
4,1.0,1.0,0.0,0.0,0.0,30.0,Eren
5,3.0,0.0,0.0,1.0,0.0,5555.0,Might_Guy
6,2.0,0.0,1.0,0.0,0.0,50.0,Levi
7,2.0,0.0,0.0,0.0,1.0,5555.0,Goku
8,1.0,1.0,0.0,0.0,0.0,25.0,Mikasa
