In [86]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [87]:
dataset = sns.load_dataset('titanic')

In [88]:
print('Shape: ', dataset.shape)
dataset.head()

Shape:  (891, 15)


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [89]:
y = dataset['survived']
X = dataset.drop('survived', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=0
                                                    )

In [90]:
numerical_features = make_column_selector(dtype_include=np.number)
categorical_features  = make_column_selector(dtype_exclude=np.number)

In [91]:
numerical_pipeline = make_pipeline(KNNImputer(),
                                   StandardScaler()
                                   )

categorical_pipeline = make_pipeline(SimpleImputer(strategy='most_frequent'),
                                     OneHotEncoder()
                                     )

In [92]:
preprocessor = make_column_transformer((numerical_pipeline, numerical_features),
                                       (categorical_pipeline, categorical_features)
                                       )

In [93]:
model = make_pipeline(preprocessor,
                      LinearSVC())

In [94]:
model.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('pipeline-1',
                                                  Pipeline(steps=[('knnimputer',
                                                                   KNNImputer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x7f7b95b22e80>),
                                                 ('pipeline-2',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('onehotencoder',
                                                                   OneHotEncoder())])

In [95]:
parameters = {'knnimputer__n_neighbors': np.arange(1, 51),
              'knnimputer__weights': ['uniform, distance'],
              'linearsvc__penalty': ['l1', 'l2'],
              'linearsvc__loss': ['hinge', 'squared_hinge']
              }

In [96]:
grid = GridSearchCV(estimator=model,
                    param_grid=parameters,
                    cv=5
                    )