# Data transformation

There is one of the crucial aspect of many machine learning algorithms.

Here we will explore different preprocessing algorithms.

The result will be available in titanic.data.preprocessing module.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib widget

import titanic.data.load
import titanic.data.wrangling as wrng


train_df_orig, test_df_orig = titanic.data.load.from_csv()

train_df = wrng.wrangling(train_df_orig)
test_df = wrng.wrangling(test_df_orig)

X_train, X_test, y_train, y_test = train_test_split(train_df, train_df_orig.Survived, test_size=0.3, random_state=50)

train_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Survived      891 non-null    int64  
 1   Pclass        891 non-null    int64  
 2   Sex           891 non-null    object 
 3   Age           891 non-null    float64
 4   SibSp         891 non-null    int64  
 5   Parch         891 non-null    int64  
 6   Fare          891 non-null    float64
 7   Embarked      889 non-null    object 
 8   Title         891 non-null    object 
 9   CabLet        891 non-null    object 
 10  Alone         891 non-null    int64  
 11  Familiars     891 non-null    int64  
 12  TicketLetter  891 non-null    object 
 13  LenName       891 non-null    int64  
dtypes: float64(2), int64(7), object(5)
memory usage: 97.6+ KB


In [2]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler, RobustScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

numeric_features = ['Pclass', 'Alone', 'Familiars', 'LenName']
numeric_outliers = ['Age', 'Fare']
numeric_transformer = MinMaxScaler()
numeric_outliers_transformer = RobustScaler()

categorical_features = ['Embarked', 'Sex', 'Title', 'CabLet', 'TicketLetter']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('out', numeric_outliers_transformer, numeric_outliers),
        ('cat', categorical_transformer, categorical_features),
    ]
)



In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import  GridSearchCV

n_est = [800,900,1000]
max_depth = [2,5,10]
max_depth.append(None)


param_grid = {
#     "preprocessor__num__norm": ['l1', 'l2', 'max'],
#     Number of trees in random forest
    "rand_for__n_estimators": n_est,
    # Number of features to consider at every split
    "rand_for__max_features": ['auto', 'sqrt'],
    # Maximum number of levels in tree
    "rand_for__max_depth": max_depth,
    # Minimum number of samples required to split a node
    "rand_for__min_samples_split": [2, 5, 10],
    # Minimum number of samples required at each leaf node
    "rand_for__min_samples_leaf": [2, 4, 8],
    # Method of selecting samples for training each tree
    "rand_for__bootstrap": [True, False],

}


best_params_ = {'bootstrap': False,
 'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'n_estimators': 1000}

clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("rand_for", RandomForestClassifier())]
)

grid_search = GridSearchCV(clf, param_grid, cv=10, n_jobs=10, verbose=5)

grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 432 candidates, totalling 4320 fits
[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.726 total time=   1.5s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.889 total time=   1.7s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.790 total time=   1.9s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.790 total time=   1.5s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_featu

[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.790 total time=   1.6s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.694 total time=   1.7s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.694 total time=   2.0s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.742 total time=   1.5s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_spl

[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.810 total time=   1.6s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.810 total time=   1.7s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.810 total time=   2.0s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.794 total time=   1.5s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_spl

[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.794 total time=   1.5s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.790 total time=   1.7s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.742 total time=   2.0s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.855 total time=   1.6s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_sp

[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.794 total time=   2.1s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.794 total time=   1.7s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.794 total time=   1.8s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.794 total time=   2.1s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_sp

[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.742 total time=   2.2s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.742 total time=   1.6s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.742 total time=   1.9s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.694 total time=   2.1s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_sp

[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.806 total time=   2.2s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.806 total time=   1.5s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.855 total time=   1.9s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.855 total time=   2.0s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_sp

[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.758 total time=   2.1s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.758 total time=   1.6s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.758 total time=   1.9s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.758 total time=   2.0s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_sampl

[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.889 total time=   2.1s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.790 total time=   2.3s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.889 total time=   1.9s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.889 total time=   2.2s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_s

[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.823 total time=   2.3s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.823 total time=   2.4s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.790 total time=   2.0s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.774 total time=   2.1s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_sample

[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.774 total time=   2.2s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.774 total time=   2.4s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.694 total time=   1.9s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.694 total time=   2.1s
[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_s

[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.810 total time=   2.3s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.825 total time=   1.9s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.810 total time=   2.2s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.810 total time=   2.3s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples

[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.810 total time=   2.2s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.810 total time=   2.3s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.810 total time=   2.7s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.825 total time=   2.1s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.774 total time=   2.1s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.823 total time=   2.5s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.790 total time=   2.7s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.806 total time=   2.4s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.790 total time=   2.1s
[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.790 total time=   2.5s
[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.790 total time=   2.6s
[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.742 total time=   2.4s
[CV 7/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.921 total time=   2.2s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.905 total time=   2.4s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.921 total time=   2.6s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.905 total time=   2.3s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.839 total time=   2.0s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.839 total time=   2.4s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.839 total time=   2.7s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.855 total time=   2.4s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sa

[CV 6/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.726 total time=   2.8s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.790 total time=   2.6s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.774 total time=   2.2s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.774 total time=   2.7s
[CV 5/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samp

[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.825 total time=   2.1s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.790 total time=   2.1s
[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.806 total time=   2.7s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.839 total time=   2.7s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samp

[CV 10/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.855 total time=   2.8s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.825 total time=   2.5s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.825 total time=   3.1s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.825 total time=   2.1s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_sam

[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.825 total time=   2.1s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.889 total time=   2.7s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.889 total time=   2.8s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.889 total time=   2.1s
[CV 4/10] END rand_for__bootstrap=True, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_sample

[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.823 total time=   2.9s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.839 total time=   3.0s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.790 total time=   2.5s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.790 total time=   3.2s
[CV 8/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for

[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.841 total time=   2.6s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.857 total time=   2.9s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.841 total time=   3.9s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.825 total time=   3.0s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__

[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.839 total time=   2.4s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.839 total time=   3.4s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.839 total time=   3.9s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.855 total time=   2.5s
[CV 9/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__

[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.825 total time=   2.6s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.825 total time=   2.9s
[CV 1/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.825 total time=   4.2s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.937 total time=   3.1s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__

[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.921 total time=   3.0s
[CV 3/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.921 total time=   4.0s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.841 total time=   3.0s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.857 total time=   2.7s
[CV 2/10] END rand_for__bootstrap=True, rand_for__max_depth=None, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__

[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.855 total time=   1.6s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.855 total time=   1.7s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.855 total time=   1.4s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.855 total time=   1.6s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samp

[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.726 total time=   1.4s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.694 total time=   1.6s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.694 total time=   1.6s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.694 total time=   1.4s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sample

[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.758 total time=   1.4s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.758 total time=   1.6s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.758 total time=   1.6s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.758 total time=   1.3s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_s

[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.794 total time=   1.8s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.794 total time=   1.3s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.810 total time=   1.5s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.810 total time=   1.8s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sampl

[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.790 total time=   1.3s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.889 total time=   1.6s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.889 total time=   1.8s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.889 total time=   1.9s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_sampl

[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.742 total time=   1.4s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.694 total time=   1.4s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.742 total time=   1.7s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.742 total time=   1.3s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_sample

[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.810 total time=   1.6s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.810 total time=   1.4s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.810 total time=   1.4s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.794 total time=   1.6s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_sampl

[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.694 total time=   1.3s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.694 total time=   1.4s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.694 total time=   1.9s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.694 total time=   1.9s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_sampl

[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.794 total time=   1.4s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.810 total time=   1.6s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.810 total time=   1.8s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.810 total time=   2.5s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samp

[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.790 total time=   1.9s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.790 total time=   2.1s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=2, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.790 total time=   2.2s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.905 total time=   2.3s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_sam

[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.774 total time=   2.2s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.774 total time=   2.2s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.774 total time=   1.8s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.774 total time=   1.7s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_sample

[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.806 total time=   1.8s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.806 total time=   1.8s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.806 total time=   2.0s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.806 total time=   1.8s
[CV 4/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_sampl

[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.905 total time=   1.9s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.905 total time=   1.8s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.841 total time=   2.1s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.841 total time=   1.7s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.857 total time=   1.9s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.857 total time=   1.8s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.905 total time=   2.1s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.905 total time=   1.7s
[CV 3/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_sam

[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.774 total time=   2.1s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.774 total time=   1.7s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.774 total time=   1.8s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.758 total time=   2.1s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=5, rand_for__max_features=sqrt, rand_for__min_samples_leaf=4, rand_for__min_samp

[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.841 total time=   2.5s
[CV 2/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.841 total time=   2.1s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.810 total time=   2.0s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.825 total time=   2.2s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__m

[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.790 total time=   2.7s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.839 total time=   1.8s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.839 total time=   2.1s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.839 total time=   2.2s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__m

[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=4, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.823 total time=   2.2s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.825 total time=   1.9s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.810 total time=   2.0s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.810 total time=   1.6s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__m

[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.758 total time=   1.7s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.758 total time=   1.9s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.726 total time=   2.1s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.726 total time=   1.7s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_s

[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.839 total time=   1.9s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.839 total time=   2.1s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.855 total time=   1.7s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.823 total time=   1.9s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=auto, rand_for__min_samples_leaf=8, rand_for__min

[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.839 total time=   2.3s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.839 total time=   1.9s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.855 total time=   2.1s
[CV 9/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.839 total time=   2.1s
[CV 7/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for_

[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.823 total time=   1.9s
[CV 8/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=900;, score=0.823 total time=   2.2s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=10, rand_for__max_features=sqrt, rand_for__min_samples_leaf=8, rand_for__min_samples_split=10, rand_for__n_estimators=1000;, score=0.839 total time=   2.1s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.790 total time=   2.6s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_

[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=800;, score=0.790 total time=   2.3s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.742 total time=   2.4s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.774 total time=   2.8s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.726 total time=   2.3s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_

[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=900;, score=0.774 total time=   2.5s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=2, rand_for__n_estimators=1000;, score=0.726 total time=   2.7s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.806 total time=   2.3s
[CV 6/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.742 total time=   2.4s
[CV 5/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_

[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=800;, score=0.825 total time=   2.2s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=900;, score=0.825 total time=   2.6s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=5, rand_for__n_estimators=1000;, score=0.825 total time=   2.5s
[CV 1/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, rand_for__min_samples_split=10, rand_for__n_estimators=800;, score=0.825 total time=   1.9s
[CV 10/10] END rand_for__bootstrap=False, rand_for__max_depth=None, rand_for__max_features=auto, rand_for__min_samples_leaf=2, ran

GridSearchCV(cv=10,
             estimator=Pipeline(steps=[('preprocessor',
                                        ColumnTransformer(transformers=[('num',
                                                                         MinMaxScaler(),
                                                                         ['Pclass',
                                                                          'Alone',
                                                                          'Familiars',
                                                                          'LenName']),
                                                                        ('out',
                                                                         RobustScaler(),
                                                                         ['Age',
                                                                          'Fare']),
                                                                        ('cat',


In [5]:
print("best fit params:", grid_search.best_params_)
print("model score: %.3f" % grid_search.score(X_test, y_test))
y_pred = grid_search.predict(test_df)

best fit params: {'rand_for__bootstrap': True, 'rand_for__max_depth': 10, 'rand_for__max_features': 'sqrt', 'rand_for__min_samples_leaf': 2, 'rand_for__min_samples_split': 2, 'rand_for__n_estimators': 900}
model score: 0.836


In [6]:
import kaggle 

file = r'../data/submission.csv'
competition = 'titanic'
message = ''
test_df_orig['Survived'] = y_pred
test_df_orig[['PassengerId', 'Survived']].to_csv(file, index=False)


kaggle.api.competition_submit(file,message,competition)
 


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.77k/2.77k [00:02<00:00, 983B/s]


Successfully submitted to Titanic - Machine Learning from Disaster

In [20]:
%hist

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib widget

import titanic.data.load
import titanic.data.wrangling as wrng


train_df_orig, test_df_orig = titanic.data.load.from_csv()

train_df = wrng.wrangling(train_df_orig)
test_df = wrng.wrangling(test_df_orig)

X_train, X_test, y_train, y_test = train_test_split(train_df, train_df_orig.Survived, test_size=0.3, random_state=50)

train_df.info()
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler, RobustScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

numeric_features = ['Pclass', 'Alone', 'Familiars', 'LenName']
numeric_outliers = ['Age', 'Fare']
numeric_transformer = MinMaxScaler()
numeric_outliers_transformer = RobustScaler()

categorical_features = ['Embarked', 'Sex', 'Title', 'CabLet', 'TicketLetter']
categorical_transformer = OneHotEncoder