In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
import seaborn as sns

# 1. make_column_transformer

In [None]:
titanic = sns.load_dataset('titanic')
titanic.head()

In [None]:
titanic.alone = titanic.alone.replace({True: 'True', False: 'False'})
titanic.adult_male = titanic.adult_male.replace({True: 'True', False: 'False'})
titanic.drop(['alive'], axis=1, inplace=True)

In [None]:
y = titanic.survived
X = titanic.drop('survived', axis = 1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)

In [None]:
numerical_features = ['pclass', 'age', 'fare']
categorical_features = ['sex', 'deck','alone']

In [None]:
from sklearn.compose import make_column_transformer
from sklearn.impute import SimpleImputer
from sklearn.impute import KNNImputer
from sklearn.preprocessing import OneHotEncoder

In [None]:
numerical_pipeline = make_pipeline(SimpleImputer(),
                                   StandardScaler())
categorical_pipeline = make_pipeline(SimpleImputer(strategy='most_frequent'),
                                     OneHotEncoder())

In [None]:
 preprocessor = make_column_transformer((numerical_pipeline,numerical_features),
                                     (categorical_pipeline,categorical_features))

In [None]:
model = make_pipeline(preprocessor, SGDClassifier())

In [None]:
model.fit(X_train,y_train)

In [None]:
model.score(X_test,y_test)

# 2. make_column_selector

Permet de sélectionner des colonnes selon leur type. Peut-être pratique pour séparer les variables quantitatives et les variables qualitatives.

In [None]:
from sklearn.compose import make_column_selector

In [None]:
numerical_features = make_column_selector(dtype_include=np.number)
categorical_features = make_column_selector(dtype_exclude=np.number)

In [None]:
numerical_pipeline = make_pipeline(SimpleImputer(),
                                   StandardScaler())
categorical_pipeline = make_pipeline(SimpleImputer(strategy='most_frequent'),
                                     OneHotEncoder())

In [None]:
preprocessor = make_column_transformer((numerical_pipeline,numerical_features),
                                     (categorical_pipeline,categorical_features))

In [None]:
model = make_pipeline(preprocessor, SGDClassifier())

In [None]:
model.fit(X_train,y_train)

In [None]:
model.score(X_test,y_test)