In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


In [8]:
data = {
    'Pclass': [3, 1, 3, 1, 3, 1, 2, 3],
    'Sex': ['male', 'female', 'female', 'female', 'male', 'male', np.nan, 'male'],
    'Age': [22, 38, 26, 35, 35, np.nan, 54, 2],
    'Fare': [7.25, 71.28, 7.92, 53.1, 8.05, 8.45, np.nan, 21.07],
    'Embarked': ['S', 'C', 'S', 'S', 'S', 'Q', 'S', np.nan],
    'Survived': [0, 1, 1, 1, 0, 0, 0, 0]
}
titanic_df = pd.DataFrame(data)
titanic_df

Unnamed: 0,Pclass,Sex,Age,Fare,Embarked,Survived
0,3,male,22.0,7.25,S,0
1,1,female,38.0,71.28,C,1
2,3,female,26.0,7.92,S,1
3,1,female,35.0,53.1,S,1
4,3,male,35.0,8.05,S,0
5,1,male,,8.45,Q,0
6,2,,54.0,,S,0
7,3,male,2.0,21.07,,0


In [9]:
x=titanic_df.drop('Survived',axis=1)
y=titanic_df['Survived']

In [10]:
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=.2, random_state=42)

In [12]:
preprocessor=ColumnTransformer(
    transformers=[('numeric_transformer', StandardScaler(), ['Pclass', 'Age', 'Fare']),
          ('categorical_transformer', OneHotEncoder(handle_unknown='ignore'), ['Sex', 'Embarked'])
    ],
     remainder='passthrough'
)

In [13]:
x_train_transformed=preprocessor.fit_transform(x_train)
x_test_transformer=preprocessor.transform(x_test)

In [14]:
x_train_transformed.shape

(6, 8)

In [15]:
x_train_transformed

array([[ 0.65465367, -0.4450991 , -0.69524785,  0.        ,  1.        ,
         0.        ,  1.        ,  0.        ],
       [ 0.65465367, -1.71681081,  0.0905164 ,  0.        ,  1.        ,
         0.        ,  0.        ,  1.        ],
       [ 0.65465367, -0.19075676, -0.65715363,  1.        ,  0.        ,
         0.        ,  1.        ,  0.        ],
       [ 0.65465367,  0.38151351, -0.64976222,  0.        ,  1.        ,
         0.        ,  1.        ,  0.        ],
       [-1.96396101,  0.38151351,  1.91164729,  1.        ,  0.        ,
         0.        ,  1.        ,  0.        ],
       [-0.65465367,  1.58963964,         nan,  0.        ,  0.        ,
         1.        ,  1.        ,  0.        ]])

In [16]:
x_test_transformer.shape

(2, 8)

In [17]:
x_test_transformer

array([[-1.96396101,  0.57227027,  2.94530822,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ],
       [-1.96396101,         nan, -0.6270194 ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ]])