In [2]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



In [3]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [5]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.

#selected_features = df[['koi_period', 'koi_time0bk', 'koi_time0bk_err1', 'koi_slogg', 'ra']]
#selected_features = df.drop(columns=["koi_disposition"])

X = df.drop(columns=["koi_disposition"])
y = df["koi_disposition"]

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
best_features = SelectKBest(f_classif, k=20)
best_features.fit(X, y)
selected_features = best_features.fit_transform(X, y)
best_features.get_support()

array([ True,  True,  True,  True, False, False, False, False,  True,
        True, False, False, False,  True,  True,  True,  True, False,
       False, False, False, False,  True, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True, False, False,
       False,  True, False, False])

In [7]:
y = df["koi_disposition"]
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)
encoded_y
from keras.utils import to_categorical

# Step 2: One-hot encoding
one_hot_y = to_categorical(encoded_y)
one_hot_y

Using TensorFlow backend.


array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

# Create a Train Test Split

Use `koi_disposition` for the y values

In [8]:
from sklearn.model_selection import train_test_split

#X_train, X_test, y_train, y_test = train_test_split(selected_features, df["koi_disposition"], random_state=1)
X_train, X_test, y_train, y_test = train_test_split(selected_features, one_hot_y, random_state=1)

In [9]:
X_train

array([[ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         6.6000000e-02, -1.2300000e-01,  2.9809543e+02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         7.8000000e-02, -5.2000000e-02,  2.9573535e+02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         5.4000000e-02, -2.7000000e-02,  2.9218417e+02],
       ...,
       [ 0.0000000e+00,  0.0000000e+00,  1.0000000e+00, ...,
         2.4000000e-02, -5.6000000e-02,  2.9917062e+02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         1.2600000e-01, -1.0300000e-01,  2.8816470e+02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         5.5000000e-02, -1.6500000e-01,  2.8547504e+02]])

In [10]:
y_train

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [11]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScater model and fit it to the training data
X_scaler = MinMaxScaler().fit(X_train)

# Transform the training and testing data using the X_scaler and y_scaler models
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train the Model



In [12]:
from sklearn.ensemble import RandomForestClassifier
from matplotlib import pyplot as plt

rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train_scaled, y_train)
rf.score(X_train_scaled, y_train)

1.0

In [15]:
importances = rf.feature_importances_
importances

array([0.12567106, 0.10667726, 0.12392232, 0.04575834, 0.03527742,
       0.03980364, 0.038263  , 0.04853392, 0.04664331, 0.05560944,
       0.04590628, 0.0743135 , 0.00627134, 0.02216395, 0.05242074,
       0.0399697 , 0.02628783, 0.02140355, 0.02179431, 0.02330909])

In [16]:
rf.score(X_test_scaled, y_test)

0.8981693363844394

# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [19]:
# Create the GridSearchCV model
import numpy as np
from sklearn.model_selection import GridSearchCV
param_grid = {'n_estimators': np.arange(1,500,1),
              #'weights': ["uniform", "distance"],
              #'metric': ["euclidian", "manhattan"]
             }
grid = GridSearchCV(rf, param_grid, verbose=3)

In [20]:
grid.get_params().keys()

dict_keys(['cv', 'error_score', 'estimator__bootstrap', 'estimator__ccp_alpha', 'estimator__class_weight', 'estimator__criterion', 'estimator__max_depth', 'estimator__max_features', 'estimator__max_leaf_nodes', 'estimator__max_samples', 'estimator__min_impurity_decrease', 'estimator__min_impurity_split', 'estimator__min_samples_leaf', 'estimator__min_samples_split', 'estimator__min_weight_fraction_leaf', 'estimator__n_estimators', 'estimator__n_jobs', 'estimator__oob_score', 'estimator__random_state', 'estimator__verbose', 'estimator__warm_start', 'estimator', 'n_jobs', 'param_grid', 'pre_dispatch', 'refit', 'return_train_score', 'scoring', 'verbose'])

In [21]:
# Train the model with GridSearch
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 499 candidates, totalling 2495 fits
[CV 1/5] END ....................n_estimators=1;, score=0.800 total time=   0.0s
[CV 2/5] END ....................n_estimators=1;, score=0.825 total time=   0.0s
[CV 3/5] END ....................n_estimators=1;, score=0.773 total time=   0.0s
[CV 4/5] END ....................n_estimators=1;, score=0.801 total time=   0.0s
[CV 5/5] END ....................n_estimators=1;, score=0.801 total time=   0.0s
[CV 1/5] END ....................n_estimators=2;, score=0.716 total time=   0.0s
[CV 2/5] END ....................n_estimators=2;, score=0.661 total time=   0.0s
[CV 3/5] END ....................n_estimators=2;, score=0.675 total time=   0.0s
[CV 4/5] END ....................n_estimators=2;, score=0.709 total time=   0.0s
[CV 5/5] END ....................n_estimators=2;, score=0.695 total time=   0.0s
[CV 1/5] END ....................n_estimators=3;, score=0.847 total time=   0.0s
[CV 2/5] END ....................n_estimators

[CV 3/5] END ...................n_estimators=21;, score=0.866 total time=   0.2s
[CV 4/5] END ...................n_estimators=21;, score=0.872 total time=   0.2s
[CV 5/5] END ...................n_estimators=21;, score=0.862 total time=   0.2s
[CV 1/5] END ...................n_estimators=22;, score=0.867 total time=   0.2s
[CV 2/5] END ...................n_estimators=22;, score=0.867 total time=   0.2s
[CV 3/5] END ...................n_estimators=22;, score=0.847 total time=   0.2s
[CV 4/5] END ...................n_estimators=22;, score=0.859 total time=   0.2s
[CV 5/5] END ...................n_estimators=22;, score=0.857 total time=   0.2s
[CV 1/5] END ...................n_estimators=23;, score=0.876 total time=   0.2s
[CV 2/5] END ...................n_estimators=23;, score=0.896 total time=   0.2s
[CV 3/5] END ...................n_estimators=23;, score=0.861 total time=   0.2s
[CV 4/5] END ...................n_estimators=23;, score=0.871 total time=   0.2s
[CV 5/5] END ...............

[CV 5/5] END ...................n_estimators=41;, score=0.867 total time=   0.3s
[CV 1/5] END ...................n_estimators=42;, score=0.868 total time=   0.3s
[CV 2/5] END ...................n_estimators=42;, score=0.875 total time=   0.3s
[CV 3/5] END ...................n_estimators=42;, score=0.861 total time=   0.3s
[CV 4/5] END ...................n_estimators=42;, score=0.865 total time=   0.3s
[CV 5/5] END ...................n_estimators=42;, score=0.865 total time=   0.3s
[CV 1/5] END ...................n_estimators=43;, score=0.880 total time=   0.3s
[CV 2/5] END ...................n_estimators=43;, score=0.877 total time=   0.3s
[CV 3/5] END ...................n_estimators=43;, score=0.869 total time=   0.3s
[CV 4/5] END ...................n_estimators=43;, score=0.866 total time=   0.3s
[CV 5/5] END ...................n_estimators=43;, score=0.861 total time=   0.3s
[CV 1/5] END ...................n_estimators=44;, score=0.867 total time=   0.3s
[CV 2/5] END ...............

[CV 2/5] END ...................n_estimators=62;, score=0.879 total time=   0.5s
[CV 3/5] END ...................n_estimators=62;, score=0.867 total time=   0.5s
[CV 4/5] END ...................n_estimators=62;, score=0.874 total time=   0.5s
[CV 5/5] END ...................n_estimators=62;, score=0.854 total time=   0.5s
[CV 1/5] END ...................n_estimators=63;, score=0.876 total time=   0.5s
[CV 2/5] END ...................n_estimators=63;, score=0.880 total time=   0.5s
[CV 3/5] END ...................n_estimators=63;, score=0.865 total time=   0.5s
[CV 4/5] END ...................n_estimators=63;, score=0.882 total time=   0.5s
[CV 5/5] END ...................n_estimators=63;, score=0.872 total time=   0.5s
[CV 1/5] END ...................n_estimators=64;, score=0.870 total time=   0.5s
[CV 2/5] END ...................n_estimators=64;, score=0.879 total time=   0.5s
[CV 3/5] END ...................n_estimators=64;, score=0.863 total time=   0.5s
[CV 4/5] END ...............

[CV 4/5] END ...................n_estimators=82;, score=0.866 total time=   0.6s
[CV 5/5] END ...................n_estimators=82;, score=0.865 total time=   0.6s
[CV 1/5] END ...................n_estimators=83;, score=0.880 total time=   0.6s
[CV 2/5] END ...................n_estimators=83;, score=0.890 total time=   0.6s
[CV 3/5] END ...................n_estimators=83;, score=0.870 total time=   0.6s
[CV 4/5] END ...................n_estimators=83;, score=0.881 total time=   0.6s
[CV 5/5] END ...................n_estimators=83;, score=0.859 total time=   0.6s
[CV 1/5] END ...................n_estimators=84;, score=0.878 total time=   0.6s
[CV 2/5] END ...................n_estimators=84;, score=0.881 total time=   0.6s
[CV 3/5] END ...................n_estimators=84;, score=0.869 total time=   0.6s
[CV 4/5] END ...................n_estimators=84;, score=0.876 total time=   0.6s
[CV 5/5] END ...................n_estimators=84;, score=0.864 total time=   0.6s
[CV 1/5] END ...............

[CV 1/5] END ..................n_estimators=103;, score=0.879 total time=   0.8s
[CV 2/5] END ..................n_estimators=103;, score=0.879 total time=   0.8s
[CV 3/5] END ..................n_estimators=103;, score=0.865 total time=   0.8s
[CV 4/5] END ..................n_estimators=103;, score=0.872 total time=   0.8s
[CV 5/5] END ..................n_estimators=103;, score=0.863 total time=   0.8s
[CV 1/5] END ..................n_estimators=104;, score=0.882 total time=   0.8s
[CV 2/5] END ..................n_estimators=104;, score=0.880 total time=   0.8s
[CV 3/5] END ..................n_estimators=104;, score=0.866 total time=   0.8s
[CV 4/5] END ..................n_estimators=104;, score=0.876 total time=   0.8s
[CV 5/5] END ..................n_estimators=104;, score=0.859 total time=   0.8s
[CV 1/5] END ..................n_estimators=105;, score=0.873 total time=   0.8s
[CV 2/5] END ..................n_estimators=105;, score=0.883 total time=   0.8s
[CV 3/5] END ...............

[CV 3/5] END ..................n_estimators=123;, score=0.869 total time=   0.9s
[CV 4/5] END ..................n_estimators=123;, score=0.877 total time=   0.9s
[CV 5/5] END ..................n_estimators=123;, score=0.864 total time=   0.9s
[CV 1/5] END ..................n_estimators=124;, score=0.871 total time=   0.9s
[CV 2/5] END ..................n_estimators=124;, score=0.879 total time=   0.9s
[CV 3/5] END ..................n_estimators=124;, score=0.868 total time=   0.9s
[CV 4/5] END ..................n_estimators=124;, score=0.874 total time=   0.9s
[CV 5/5] END ..................n_estimators=124;, score=0.865 total time=   0.9s
[CV 1/5] END ..................n_estimators=125;, score=0.881 total time=   0.9s
[CV 2/5] END ..................n_estimators=125;, score=0.881 total time=   0.9s
[CV 3/5] END ..................n_estimators=125;, score=0.870 total time=   0.9s
[CV 4/5] END ..................n_estimators=125;, score=0.873 total time=   0.9s
[CV 5/5] END ...............

[CV 5/5] END ..................n_estimators=143;, score=0.861 total time=   1.1s
[CV 1/5] END ..................n_estimators=144;, score=0.877 total time=   1.1s
[CV 2/5] END ..................n_estimators=144;, score=0.880 total time=   1.1s
[CV 3/5] END ..................n_estimators=144;, score=0.870 total time=   1.1s
[CV 4/5] END ..................n_estimators=144;, score=0.869 total time=   1.1s
[CV 5/5] END ..................n_estimators=144;, score=0.858 total time=   1.1s
[CV 1/5] END ..................n_estimators=145;, score=0.884 total time=   1.1s
[CV 2/5] END ..................n_estimators=145;, score=0.879 total time=   1.1s
[CV 3/5] END ..................n_estimators=145;, score=0.869 total time=   1.1s
[CV 4/5] END ..................n_estimators=145;, score=0.872 total time=   1.1s
[CV 5/5] END ..................n_estimators=145;, score=0.865 total time=   1.1s
[CV 1/5] END ..................n_estimators=146;, score=0.879 total time=   1.1s
[CV 2/5] END ...............

[CV 2/5] END ..................n_estimators=164;, score=0.882 total time=   1.2s
[CV 3/5] END ..................n_estimators=164;, score=0.867 total time=   1.2s
[CV 4/5] END ..................n_estimators=164;, score=0.871 total time=   1.2s
[CV 5/5] END ..................n_estimators=164;, score=0.860 total time=   1.2s
[CV 1/5] END ..................n_estimators=165;, score=0.880 total time=   1.2s
[CV 2/5] END ..................n_estimators=165;, score=0.883 total time=   1.2s
[CV 3/5] END ..................n_estimators=165;, score=0.867 total time=   1.2s
[CV 4/5] END ..................n_estimators=165;, score=0.878 total time=   1.2s
[CV 5/5] END ..................n_estimators=165;, score=0.864 total time=   1.2s
[CV 1/5] END ..................n_estimators=166;, score=0.871 total time=   1.2s
[CV 2/5] END ..................n_estimators=166;, score=0.885 total time=   1.2s
[CV 3/5] END ..................n_estimators=166;, score=0.871 total time=   1.2s
[CV 4/5] END ...............

[CV 4/5] END ..................n_estimators=184;, score=0.878 total time=   1.4s
[CV 5/5] END ..................n_estimators=184;, score=0.857 total time=   1.4s
[CV 1/5] END ..................n_estimators=185;, score=0.879 total time=   1.4s
[CV 2/5] END ..................n_estimators=185;, score=0.882 total time=   1.4s
[CV 3/5] END ..................n_estimators=185;, score=0.870 total time=   1.4s
[CV 4/5] END ..................n_estimators=185;, score=0.877 total time=   1.4s
[CV 5/5] END ..................n_estimators=185;, score=0.862 total time=   1.4s
[CV 1/5] END ..................n_estimators=186;, score=0.876 total time=   1.4s
[CV 2/5] END ..................n_estimators=186;, score=0.879 total time=   1.4s
[CV 3/5] END ..................n_estimators=186;, score=0.871 total time=   1.4s
[CV 4/5] END ..................n_estimators=186;, score=0.880 total time=   1.4s
[CV 5/5] END ..................n_estimators=186;, score=0.863 total time=   1.4s
[CV 1/5] END ...............

[CV 1/5] END ..................n_estimators=205;, score=0.879 total time=   1.5s
[CV 2/5] END ..................n_estimators=205;, score=0.886 total time=   1.5s
[CV 3/5] END ..................n_estimators=205;, score=0.867 total time=   1.5s
[CV 4/5] END ..................n_estimators=205;, score=0.876 total time=   1.5s
[CV 5/5] END ..................n_estimators=205;, score=0.865 total time=   1.5s
[CV 1/5] END ..................n_estimators=206;, score=0.878 total time=   1.5s
[CV 2/5] END ..................n_estimators=206;, score=0.875 total time=   1.5s
[CV 3/5] END ..................n_estimators=206;, score=0.871 total time=   1.5s
[CV 4/5] END ..................n_estimators=206;, score=0.876 total time=   1.5s
[CV 5/5] END ..................n_estimators=206;, score=0.864 total time=   1.5s
[CV 1/5] END ..................n_estimators=207;, score=0.877 total time=   1.5s
[CV 2/5] END ..................n_estimators=207;, score=0.882 total time=   1.6s
[CV 3/5] END ...............

[CV 3/5] END ..................n_estimators=225;, score=0.872 total time=   1.7s
[CV 4/5] END ..................n_estimators=225;, score=0.876 total time=   1.7s
[CV 5/5] END ..................n_estimators=225;, score=0.866 total time=   1.7s
[CV 1/5] END ..................n_estimators=226;, score=0.879 total time=   1.7s
[CV 2/5] END ..................n_estimators=226;, score=0.883 total time=   1.7s
[CV 3/5] END ..................n_estimators=226;, score=0.865 total time=   1.7s
[CV 4/5] END ..................n_estimators=226;, score=0.876 total time=   1.7s
[CV 5/5] END ..................n_estimators=226;, score=0.861 total time=   1.7s
[CV 1/5] END ..................n_estimators=227;, score=0.883 total time=   1.7s
[CV 2/5] END ..................n_estimators=227;, score=0.883 total time=   1.7s
[CV 3/5] END ..................n_estimators=227;, score=0.863 total time=   1.7s
[CV 4/5] END ..................n_estimators=227;, score=0.876 total time=   1.7s
[CV 5/5] END ...............

[CV 5/5] END ..................n_estimators=245;, score=0.862 total time=   1.8s
[CV 1/5] END ..................n_estimators=246;, score=0.881 total time=   1.8s
[CV 2/5] END ..................n_estimators=246;, score=0.882 total time=   1.8s
[CV 3/5] END ..................n_estimators=246;, score=0.872 total time=   1.8s
[CV 4/5] END ..................n_estimators=246;, score=0.872 total time=   1.8s
[CV 5/5] END ..................n_estimators=246;, score=0.861 total time=   1.8s
[CV 1/5] END ..................n_estimators=247;, score=0.880 total time=   1.8s
[CV 2/5] END ..................n_estimators=247;, score=0.883 total time=   1.9s
[CV 3/5] END ..................n_estimators=247;, score=0.867 total time=   1.8s
[CV 4/5] END ..................n_estimators=247;, score=0.875 total time=   1.8s
[CV 5/5] END ..................n_estimators=247;, score=0.867 total time=   2.2s
[CV 1/5] END ..................n_estimators=248;, score=0.878 total time=   1.9s
[CV 2/5] END ...............

[CV 2/5] END ..................n_estimators=266;, score=0.885 total time=   2.0s
[CV 3/5] END ..................n_estimators=266;, score=0.866 total time=   2.0s
[CV 4/5] END ..................n_estimators=266;, score=0.878 total time=   2.0s
[CV 5/5] END ..................n_estimators=266;, score=0.862 total time=   2.0s
[CV 1/5] END ..................n_estimators=267;, score=0.876 total time=   2.0s
[CV 2/5] END ..................n_estimators=267;, score=0.884 total time=   2.0s
[CV 3/5] END ..................n_estimators=267;, score=0.867 total time=   2.0s
[CV 4/5] END ..................n_estimators=267;, score=0.873 total time=   2.0s
[CV 5/5] END ..................n_estimators=267;, score=0.867 total time=   2.0s
[CV 1/5] END ..................n_estimators=268;, score=0.884 total time=   2.0s
[CV 2/5] END ..................n_estimators=268;, score=0.885 total time=   2.0s
[CV 3/5] END ..................n_estimators=268;, score=0.868 total time=   2.0s
[CV 4/5] END ...............

[CV 4/5] END ..................n_estimators=286;, score=0.876 total time=   2.1s
[CV 5/5] END ..................n_estimators=286;, score=0.860 total time=   2.1s
[CV 1/5] END ..................n_estimators=287;, score=0.879 total time=   2.1s
[CV 2/5] END ..................n_estimators=287;, score=0.884 total time=   2.2s
[CV 3/5] END ..................n_estimators=287;, score=0.867 total time=   2.1s
[CV 4/5] END ..................n_estimators=287;, score=0.881 total time=   2.1s
[CV 5/5] END ..................n_estimators=287;, score=0.863 total time=   2.1s
[CV 1/5] END ..................n_estimators=288;, score=0.876 total time=   2.1s
[CV 2/5] END ..................n_estimators=288;, score=0.884 total time=   2.2s
[CV 3/5] END ..................n_estimators=288;, score=0.871 total time=   2.1s
[CV 4/5] END ..................n_estimators=288;, score=0.873 total time=   2.1s
[CV 5/5] END ..................n_estimators=288;, score=0.862 total time=   2.1s
[CV 1/5] END ...............

[CV 1/5] END ..................n_estimators=307;, score=0.879 total time=   2.3s
[CV 2/5] END ..................n_estimators=307;, score=0.884 total time=   2.3s
[CV 3/5] END ..................n_estimators=307;, score=0.875 total time=   2.3s
[CV 4/5] END ..................n_estimators=307;, score=0.874 total time=   2.3s
[CV 5/5] END ..................n_estimators=307;, score=0.865 total time=   2.3s
[CV 1/5] END ..................n_estimators=308;, score=0.882 total time=   2.3s
[CV 2/5] END ..................n_estimators=308;, score=0.882 total time=   2.3s
[CV 3/5] END ..................n_estimators=308;, score=0.869 total time=   2.3s
[CV 4/5] END ..................n_estimators=308;, score=0.878 total time=   2.3s
[CV 5/5] END ..................n_estimators=308;, score=0.861 total time=   2.3s
[CV 1/5] END ..................n_estimators=309;, score=0.883 total time=   2.3s
[CV 2/5] END ..................n_estimators=309;, score=0.880 total time=   2.3s
[CV 3/5] END ...............

[CV 3/5] END ..................n_estimators=327;, score=0.868 total time=   2.4s
[CV 4/5] END ..................n_estimators=327;, score=0.875 total time=   2.4s
[CV 5/5] END ..................n_estimators=327;, score=0.865 total time=   2.4s
[CV 1/5] END ..................n_estimators=328;, score=0.877 total time=   2.4s
[CV 2/5] END ..................n_estimators=328;, score=0.885 total time=   2.5s
[CV 3/5] END ..................n_estimators=328;, score=0.869 total time=   2.4s
[CV 4/5] END ..................n_estimators=328;, score=0.879 total time=   2.4s
[CV 5/5] END ..................n_estimators=328;, score=0.866 total time=   2.4s
[CV 1/5] END ..................n_estimators=329;, score=0.882 total time=   2.5s
[CV 2/5] END ..................n_estimators=329;, score=0.881 total time=   2.5s
[CV 3/5] END ..................n_estimators=329;, score=0.870 total time=   2.4s
[CV 4/5] END ..................n_estimators=329;, score=0.875 total time=   2.4s
[CV 5/5] END ...............

[CV 5/5] END ..................n_estimators=347;, score=0.865 total time=   2.6s
[CV 1/5] END ..................n_estimators=348;, score=0.880 total time=   2.6s
[CV 2/5] END ..................n_estimators=348;, score=0.880 total time=   2.6s
[CV 3/5] END ..................n_estimators=348;, score=0.875 total time=   2.6s
[CV 4/5] END ..................n_estimators=348;, score=0.873 total time=   2.6s
[CV 5/5] END ..................n_estimators=348;, score=0.860 total time=   2.6s
[CV 1/5] END ..................n_estimators=349;, score=0.882 total time=   2.6s
[CV 2/5] END ..................n_estimators=349;, score=0.883 total time=   2.6s
[CV 3/5] END ..................n_estimators=349;, score=0.871 total time=   2.6s
[CV 4/5] END ..................n_estimators=349;, score=0.878 total time=   2.6s
[CV 5/5] END ..................n_estimators=349;, score=0.860 total time=   2.6s
[CV 1/5] END ..................n_estimators=350;, score=0.877 total time=   2.6s
[CV 2/5] END ...............

[CV 2/5] END ..................n_estimators=368;, score=0.879 total time=   2.8s
[CV 3/5] END ..................n_estimators=368;, score=0.866 total time=   2.8s
[CV 4/5] END ..................n_estimators=368;, score=0.877 total time=   2.7s
[CV 5/5] END ..................n_estimators=368;, score=0.866 total time=   2.7s
[CV 1/5] END ..................n_estimators=369;, score=0.878 total time=   2.7s
[CV 2/5] END ..................n_estimators=369;, score=0.880 total time=   2.8s
[CV 3/5] END ..................n_estimators=369;, score=0.867 total time=   2.8s
[CV 4/5] END ..................n_estimators=369;, score=0.878 total time=   2.7s
[CV 5/5] END ..................n_estimators=369;, score=0.861 total time=   2.8s
[CV 1/5] END ..................n_estimators=370;, score=0.876 total time=   2.7s
[CV 2/5] END ..................n_estimators=370;, score=0.882 total time=   2.8s
[CV 3/5] END ..................n_estimators=370;, score=0.871 total time=   2.7s
[CV 4/5] END ...............

[CV 4/5] END ..................n_estimators=388;, score=0.876 total time=   2.9s
[CV 5/5] END ..................n_estimators=388;, score=0.864 total time=   2.9s
[CV 1/5] END ..................n_estimators=389;, score=0.881 total time=   2.9s
[CV 2/5] END ..................n_estimators=389;, score=0.879 total time=   2.9s
[CV 3/5] END ..................n_estimators=389;, score=0.869 total time=   2.9s
[CV 4/5] END ..................n_estimators=389;, score=0.878 total time=   2.9s
[CV 5/5] END ..................n_estimators=389;, score=0.862 total time=   2.9s
[CV 1/5] END ..................n_estimators=390;, score=0.879 total time=   2.9s
[CV 2/5] END ..................n_estimators=390;, score=0.888 total time=   2.9s
[CV 3/5] END ..................n_estimators=390;, score=0.871 total time=   2.9s
[CV 4/5] END ..................n_estimators=390;, score=0.874 total time=   2.9s
[CV 5/5] END ..................n_estimators=390;, score=0.860 total time=   2.9s
[CV 1/5] END ...............

[CV 1/5] END ..................n_estimators=409;, score=0.882 total time=   3.0s
[CV 2/5] END ..................n_estimators=409;, score=0.879 total time=   3.1s
[CV 3/5] END ..................n_estimators=409;, score=0.869 total time=   3.0s
[CV 4/5] END ..................n_estimators=409;, score=0.873 total time=   3.0s
[CV 5/5] END ..................n_estimators=409;, score=0.864 total time=   3.0s
[CV 1/5] END ..................n_estimators=410;, score=0.880 total time=   3.0s
[CV 2/5] END ..................n_estimators=410;, score=0.880 total time=   3.1s
[CV 3/5] END ..................n_estimators=410;, score=0.869 total time=   3.0s
[CV 4/5] END ..................n_estimators=410;, score=0.876 total time=   3.0s
[CV 5/5] END ..................n_estimators=410;, score=0.865 total time=   3.0s
[CV 1/5] END ..................n_estimators=411;, score=0.879 total time=   3.1s
[CV 2/5] END ..................n_estimators=411;, score=0.883 total time=   3.1s
[CV 3/5] END ...............

[CV 3/5] END ..................n_estimators=429;, score=0.868 total time=   3.2s
[CV 4/5] END ..................n_estimators=429;, score=0.875 total time=   3.2s
[CV 5/5] END ..................n_estimators=429;, score=0.865 total time=   3.2s
[CV 1/5] END ..................n_estimators=430;, score=0.876 total time=   3.2s
[CV 2/5] END ..................n_estimators=430;, score=0.881 total time=   3.2s
[CV 3/5] END ..................n_estimators=430;, score=0.867 total time=   3.2s
[CV 4/5] END ..................n_estimators=430;, score=0.878 total time=   3.2s
[CV 5/5] END ..................n_estimators=430;, score=0.863 total time=   3.2s
[CV 1/5] END ..................n_estimators=431;, score=0.877 total time=   3.2s
[CV 2/5] END ..................n_estimators=431;, score=0.881 total time=   3.2s
[CV 3/5] END ..................n_estimators=431;, score=0.870 total time=   3.2s
[CV 4/5] END ..................n_estimators=431;, score=0.877 total time=   3.2s
[CV 5/5] END ...............

[CV 5/5] END ..................n_estimators=449;, score=0.865 total time=   3.3s
[CV 1/5] END ..................n_estimators=450;, score=0.881 total time=   3.3s
[CV 2/5] END ..................n_estimators=450;, score=0.887 total time=   3.4s
[CV 3/5] END ..................n_estimators=450;, score=0.870 total time=   3.3s
[CV 4/5] END ..................n_estimators=450;, score=0.872 total time=   3.3s
[CV 5/5] END ..................n_estimators=450;, score=0.862 total time=   3.4s
[CV 1/5] END ..................n_estimators=451;, score=0.877 total time=   3.4s
[CV 2/5] END ..................n_estimators=451;, score=0.880 total time=   3.4s
[CV 3/5] END ..................n_estimators=451;, score=0.869 total time=   3.3s
[CV 4/5] END ..................n_estimators=451;, score=0.880 total time=   3.3s
[CV 5/5] END ..................n_estimators=451;, score=0.862 total time=   3.4s
[CV 1/5] END ..................n_estimators=452;, score=0.878 total time=   3.4s
[CV 2/5] END ...............

[CV 2/5] END ..................n_estimators=470;, score=0.882 total time=   3.5s
[CV 3/5] END ..................n_estimators=470;, score=0.873 total time=   3.5s
[CV 4/5] END ..................n_estimators=470;, score=0.878 total time=   3.5s
[CV 5/5] END ..................n_estimators=470;, score=0.868 total time=   3.5s
[CV 1/5] END ..................n_estimators=471;, score=0.884 total time=   3.5s
[CV 2/5] END ..................n_estimators=471;, score=0.882 total time=   3.5s
[CV 3/5] END ..................n_estimators=471;, score=0.871 total time=   3.5s
[CV 4/5] END ..................n_estimators=471;, score=0.874 total time=   3.5s
[CV 5/5] END ..................n_estimators=471;, score=0.862 total time=   3.5s
[CV 1/5] END ..................n_estimators=472;, score=0.879 total time=   3.5s
[CV 2/5] END ..................n_estimators=472;, score=0.883 total time=   3.5s
[CV 3/5] END ..................n_estimators=472;, score=0.870 total time=   3.5s
[CV 4/5] END ...............

[CV 4/5] END ..................n_estimators=490;, score=0.875 total time=   3.6s
[CV 5/5] END ..................n_estimators=490;, score=0.862 total time=   3.6s
[CV 1/5] END ..................n_estimators=491;, score=0.874 total time=   3.7s
[CV 2/5] END ..................n_estimators=491;, score=0.881 total time=   3.7s
[CV 3/5] END ..................n_estimators=491;, score=0.871 total time=   3.6s
[CV 4/5] END ..................n_estimators=491;, score=0.872 total time=   3.6s
[CV 5/5] END ..................n_estimators=491;, score=0.862 total time=   3.7s
[CV 1/5] END ..................n_estimators=492;, score=0.877 total time=   3.7s
[CV 2/5] END ..................n_estimators=492;, score=0.885 total time=   3.7s
[CV 3/5] END ..................n_estimators=492;, score=0.869 total time=   3.6s
[CV 4/5] END ..................n_estimators=492;, score=0.873 total time=   3.6s
[CV 5/5] END ..................n_estimators=492;, score=0.860 total time=   3.7s
[CV 1/5] END ...............

GridSearchCV(estimator=RandomForestClassifier(n_estimators=200),
             param_grid={'n_estimators': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,...
       404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416,
       417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429,
       430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442,
       443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455,
       456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468,
       469, 470, 471, 472, 473, 474, 475, 476, 

In [22]:
print(grid.best_params_)
print(grid.best_score_)

{'n_estimators': 121}
0.8769768008790633


In [23]:
rf = RandomForestClassifier(n_estimators=121)
rf = rf.fit(X_train_scaled, y_train)
rf.score(X_train_scaled, y_train)

1.0

In [24]:
rf.score(X_test_scaled, y_test)

0.897025171624714

# Save the Model

In [52]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'adam_burstyn.sav'
joblib.dump(knn, filename)

['adam_burstyn.sav']