In [1]:

import os

import pandas as pd
import numpy as np

# Matplotlib for visualization
from matplotlib import pyplot as plt
# display plots in the notebook
%matplotlib inline

# Seaborn for easier visualization
import seaborn as sns
### sns.set_style('darkgrid')

# store elements as dictionary keys and their counts as dictionary values
from collections import Counter

# scikit-learn
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import make_pipeline

# Classification metrics
from sklearn.metrics import confusion_matrix, classification_report

# Function for creating model pipelines - sklearn
from sklearn.pipeline import make_pipeline

# Function for creating model pipelines - imblearn
from imblearn.pipeline import make_pipeline as imbl_pipe

# Over-sampling using SMOTE
from imblearn.over_sampling import SMOTE


In [2]:
abt = pd.read_csv("../Resources/analytical_base_table.csv")
abt.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


## Models Training

## Let's start by splitting our dataframe into separate objects:

y for the target varibale

X for the input features

In [3]:
#Separate dataframe into separate object

# Object for target variable
y = abt.Exited

# object for input features
X = abt.drop(['Exited'], axis=1)

# display shapes of X and y
print(X.shape, y.shape)


(10000, 10) (10000,)


In [4]:
# List numerical features
num_columns = X.select_dtypes(include='number').columns.tolist()
num_columns

['CreditScore',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary']

In [5]:
# List categorical features
cat_columns = X.select_dtypes(include='object').columns.tolist()
cat_columns

['Geography', 'Gender']

In [6]:
def class_count(a):
    counter=Counter(a)
    kv=[list(counter.keys()),list(counter.values())]
    abt2 = pd.DataFrame(np.array(kv).T, columns=['Exited','Count'])
    abt2['Count'] = abt2['Count'].astype('int64')
    abt2['%'] = round(abt2['Count'] / a.shape[0] * 100, 2)
    return abt2.sort_values('Count',ascending=False)

In [7]:
class_count(y)


Unnamed: 0,Exited,Count,%
1,0,7963,79.63
0,1,2037,20.37


## Create a Train Test Split

We will continue with splitting our data into separate training and test sets.

30% of observations will be set aside for the test set

the rest, 70%, will be used as the training set

In [8]:
random_state = 10

# Split X and y into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=random_state,
                                                    stratify=abt.Exited)

# Print number of observations in X_train, X_test, y_train, and y_test
print(len(X_train), len(X_test), len(y_train), len(y_test))

7000 3000 7000 3000


In [9]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7000 entries, 8061 to 4741
Data columns (total 10 columns):
CreditScore        7000 non-null int64
Geography          7000 non-null object
Gender             7000 non-null object
Age                7000 non-null int64
Tenure             7000 non-null int64
Balance            7000 non-null float64
NumOfProducts      7000 non-null int64
HasCrCard          7000 non-null int64
IsActiveMember     7000 non-null int64
EstimatedSalary    7000 non-null float64
dtypes: float64(2), int64(6), object(2)
memory usage: 601.6+ KB


## Pre-processing Pipeline
 
 ##Scale numerical data and encode categorical data
Construct a pre-processing pipeline from the given transformers: MinMaxScaler and OneHotEncoder Create lists of indexes from the list of column namesNeed to be numeric not string to specify columns name in column transformer

In [10]:
num_features = [] 

for i in num_columns:
    location = X.columns.get_loc(i)
    num_features.append(location)
print(num_features)  

[0, 3, 4, 5, 6, 7, 8, 9]


In [11]:
cat_features = []

for i in cat_columns:
    location = X.columns.get_loc(i)
    cat_features.append(location)
print(cat_features) 

[1, 2]


In [12]:
# Define column transformer
# Need to be numeric not string to specify columns name 
preprocess = make_column_transformer(
    (MinMaxScaler(), num_features),
    (OneHotEncoder(sparse=False), cat_features)
)
preprocess

ColumnTransformer(transformers=[('minmaxscaler', MinMaxScaler(),
                                 [0, 3, 4, 5, 6, 7, 8, 9]),
                                ('onehotencoder', OneHotEncoder(sparse=False),
                                 [1, 2])])

In [13]:
# Import classifier
from sklearn.ensemble import RandomForestClassifier

# Define model with pipeline
model = imbl_pipe(preprocess,
                  SMOTE(sampling_strategy='auto', random_state=random_state),
                  RandomForestClassifier())

model

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  [0, 3, 4, 5, 6, 7, 8, 9]),
                                                 ('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  [1, 2])])),
                ('smote', SMOTE(random_state=10)),
                ('randomforestclassifier', RandomForestClassifier())])

In [15]:
 # Create the GridSearchCV model
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
rf_param_grid = {
    'randomforestclassifier__n_estimators' : [50, 100, 150],
    'randomforestclassifier__max_features' : ['sqrt', 0.33],
    'randomforestclassifier__min_samples_leaf' : [1, 5 ,10, 15],
    'randomforestclassifier__criterion' : ['gini', 'entropy'],
    'randomforestclassifier__min_samples_split' : [2, 3, 4]
}
rf_grid = GridSearchCV(model, rf_param_grid, verbose=3, cv= 5, scoring='accuracy')

In [16]:
rf_grid.fit(X_train, y_train)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.831, total=   0.9s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.836, total=   0.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.6s remaining:    0.0s


[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.831, total=   0.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.841, total=   1.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__ma

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.837, total=   3.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.839, total=   1.4s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.837, total=   3.1s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.843, total=   1.5s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.836, total=   1.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.816, total=   1.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.837, total=   1.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.839, total=   1.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__m

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.797, total=   0.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.820, total=   1.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.830, total=   1.1s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.831, total=   1.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.824, total=   1.1s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.838, total=   1.2s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.821, total=   1.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.804, total=   2.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.828, total=   2.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.829, total=   0.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifie

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.827, total=   1.1s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.829, total=   0.9s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__ma

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.839, total=   2.4s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.844, total=   1.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.841, total=   2.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.843, total=   1.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.826, total=   2.1s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.814, total=   2.6s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.842, total=   2.9s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.836, total=   0.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__m

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.801, total=   0.7s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.825, total=   0.8s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.837, total=   1.4s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.821, total=   1.4s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.826, total=   1.4s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.838, total=   1.5s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.820, total=   2.2s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.804, total=   2.2s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=gini, randomforestclassi

[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.827, total=   2.0s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.834, total=   0.9s
[CV] randomforestclassifier__criterion=gini, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=gini, randomforestclassifie

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.830, total=   1.0s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.831, total=   1.2s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomfores

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.844, total=   2.4s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.836, total=   2.3s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.840, total=   2.2s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.839, total=   2.0s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.835, total=   2.8s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.816, total=   2.6s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.838, total=   2.5s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.842, total=   1.1s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomfore

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.807, total=   0.8s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.816, total=   0.8s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.831, total=   1.7s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.829, total=   1.5s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.828, total=   1.6s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.835, total=   2.1s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.822, total=   2.1s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.799, total=   2.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.824, total=   3.2s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.829, total=   0.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=sqrt, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, random

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.824, total=   1.6s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.835, total=   1.2s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomfores

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.840, total=   2.6s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.833, total=   2.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.834, total=   2.5s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.847, total=   2.5s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=1, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.831, total=   3.4s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.809, total=   3.3s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.839, total=   3.3s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.839, total=   1.2s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=5, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomfore

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.808, total=   1.4s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50, score=0.824, total=   1.4s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomf

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.832, total=   2.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100, score=0.824, total=   2.3s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.825, total=   2.3s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=100, score=0.839, total=   2.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=10, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.823, total=   3.4s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150, score=0.801, total=   2.8s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=2, randomforestclassifier__n_estimators=150 
[CV]  randomforestclassifier__criterion=entropy, ran

[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=3, randomforestclassifier__n_estimators=150, score=0.835, total=   2.9s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50, score=0.836, total=   1.0s
[CV] randomforestclassifier__criterion=entropy, randomforestclassifier__max_features=0.33, randomforestclassifier__min_samples_leaf=15, randomforestclassifier__min_samples_split=4, randomforestclassifier__n_estimators=50 
[CV]  randomforestclassifier__criterion=entropy, random

[Parallel(n_jobs=1)]: Done 720 out of 720 | elapsed: 23.1min finished


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('columntransformer',
                                        ColumnTransformer(transformers=[('minmaxscaler',
                                                                         MinMaxScaler(),
                                                                         [0, 3,
                                                                          4, 5,
                                                                          6, 7,
                                                                          8,
                                                                          9]),
                                                                        ('onehotencoder',
                                                                         OneHotEncoder(sparse=False),
                                                                         [1,
                                                                          2])

In [17]:
print(rf_grid.best_params_)

{'randomforestclassifier__criterion': 'entropy', 'randomforestclassifier__max_features': 'sqrt', 'randomforestclassifier__min_samples_leaf': 1, 'randomforestclassifier__min_samples_split': 2, 'randomforestclassifier__n_estimators': 100}


In [18]:
print(rf_grid.best_score_)

0.8425714285714285


In [19]:
print(f"Training Data Score: {rf_grid.score(X_train, y_train)}")
print(f"Testing Data Score: {rf_grid.score(X_test, y_test)}")

Training Data Score: 1.0
Testing Data Score: 0.836


In [20]:
predictions = rf_grid.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

First 10 Predictions:   [0 0 0 0 1 1 0 0 0 0]
First 10 Actual labels: [1, 0, 0, 0, 0, 1, 0, 0, 0, 0]


In [21]:
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)

Unnamed: 0,Prediction,Actual
0,0,1
1,0,0
2,0,0
3,0,0
4,1,0
...,...,...
2995,0,0
2996,0,0
2997,0,0
2998,0,0


In [22]:
cm = confusion_matrix(y_test, predictions)
print(cm)

[[2130  259]
 [ 233  378]]


In [23]:
cm = np.around(cm / cm.sum(axis=1)[:, np.newaxis], 2)
print(cm)

[[0.89 0.11]
 [0.38 0.62]]


In [24]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.90      0.89      0.90      2389
           1       0.59      0.62      0.61       611

    accuracy                           0.84      3000
   macro avg       0.75      0.76      0.75      3000
weighted avg       0.84      0.84      0.84      3000



In [25]:
pred = rf_grid.predict(X_test[:1])

In [26]:
print(f"Predicted classes: {pred}")
print(f"Actual Labels: {list(y_test[:1])}")

Predicted classes: [0]
Actual Labels: [1]


In [27]:
import joblib

filename = '../models/nate_random_forest.sav'
joblib.dump(rf_grid, filename)

['nate_random_forest.sav']

In [29]:
rf_model = joblib.load(filename)
print(rf_model.score(X_test, y_test))

0.836
