In [10]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in /Users/jam/anaconda3/envs/PythonData2/lib/python3.6/site-packages (0.0)


In [11]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [12]:
import pandas as pd
import numpy as np
from pprint import pprint
from sklearn import tree

# Read the CSV and Perform Basic Data Cleaning

In [13]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head(10)

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714
5,CONFIRMED,0,0,0,0,2.566589,1.78e-05,-1.78e-05,179.55437,0.00461,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714
6,CONFIRMED,0,0,0,0,16.068647,1.09e-05,-1.09e-05,173.621937,0.000517,...,-83,4.485,0.083,-0.028,0.848,0.033,-0.072,286.99948,48.37579,15.841
7,CONFIRMED,0,0,0,0,2.470613,2.7e-08,-2.7e-08,122.763305,9e-06,...,-78,4.457,0.024,-0.024,0.964,0.038,-0.038,286.80847,49.316399,11.338
8,CONFIRMED,0,1,0,0,2.204735,4.3e-08,-4.3e-08,121.358542,1.6e-05,...,-89,4.019,0.033,-0.027,1.952,0.099,-0.11,292.24728,47.969521,10.463
9,CONFIRMED,0,0,0,0,3.522498,1.98e-07,-1.98e-07,121.119423,4.7e-05,...,-137,4.169,0.055,-0.045,1.451,0.11,-0.11,281.28812,42.45108,13.563


In [14]:
df.shape

(6991, 41)

In [15]:
df.columns

Index(['koi_disposition', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period', 'koi_period_err1', 'koi_period_err2',
       'koi_time0bk', 'koi_time0bk_err1', 'koi_time0bk_err2', 'koi_impact',
       'koi_impact_err1', 'koi_impact_err2', 'koi_duration',
       'koi_duration_err1', 'koi_duration_err2', 'koi_depth', 'koi_depth_err1',
       'koi_depth_err2', 'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
       'koi_teq', 'koi_insol', 'koi_insol_err1', 'koi_insol_err2',
       'koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 'koi_steff_err1',
       'koi_steff_err2', 'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
       'koi_srad', 'koi_srad_err1', 'koi_srad_err2', 'ra', 'dec',
       'koi_kepmag'],
      dtype='object')

# Determine Useful Columns

In [16]:
# target is the 'y'
target = df["koi_disposition"]
target_names = ["CONFIRMED", "CANDIDATE", "FALSE POSITIVE"]

In [17]:
# data is the 'X'
data = df.drop("koi_disposition", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,-0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,-0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,-0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,-0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,-0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [18]:
# Split the data using train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target)

In [19]:
# Create a Decision Tree Classifier
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_train, y_train)
#clf.score(X_test, y_test)

1.0

In [20]:
clf.score(X_train, y_train)

1.0

In [21]:
# Fit the classifier to the data
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=400)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.8930205949656751

In [22]:
rf.score(X_train, y_train)

1.0

In [23]:
# Create, fit, and score a Random Forest Classifier
sorted(zip(rf.feature_importances_, feature_names), reverse=True)

[(0.10716153331459054, 'koi_fpflag_co'),
 (0.09800585786193831, 'koi_fpflag_nt'),
 (0.07023704232009358, 'koi_fpflag_ss'),
 (0.055224280505168685, 'koi_model_snr'),
 (0.050041210178684396, 'koi_prad'),
 (0.03659123242395773, 'koi_fpflag_ec'),
 (0.03592077938950474, 'koi_duration_err2'),
 (0.03388288997624427, 'koi_steff_err1'),
 (0.03320394675036982, 'koi_duration_err1'),
 (0.030214887579611056, 'koi_prad_err2'),
 (0.030096766457400698, 'koi_prad_err1'),
 (0.027930037634625114, 'koi_steff_err2'),
 (0.023724204186495883, 'koi_time0bk_err2'),
 (0.023298785073811695, 'koi_duration'),
 (0.022745237219290438, 'koi_time0bk_err1'),
 (0.020398687056090582, 'koi_period'),
 (0.01959054450110027, 'koi_insol_err1'),
 (0.018518704707323266, 'koi_depth'),
 (0.01829198404487362, 'koi_impact'),
 (0.01668901244869575, 'koi_period_err2'),
 (0.016580293299370694, 'koi_teq'),
 (0.01590118059310141, 'koi_period_err1'),
 (0.014767790400818217, 'koi_insol'),
 (0.014641199958124747, 'koi_insol_err2'),
 (0.013

# Pre processing ALL FEATURES

In [24]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler

In [25]:
# Scale your data
scaler = MinMaxScaler().fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [26]:
#picking the random forest model again
rf1 = RandomForestClassifier(n_estimators=500)
rf_for_scaled = rf1.fit(X_train_scaled, y_train)

In [27]:
print(f"Training Data Score: {rf_for_scaled.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf_for_scaled.score(X_test_scaled, y_test)}")

Training Data Score: 1.0
Testing Data Score: 0.8941647597254004


# Deep Learning on all columns

In [28]:
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [29]:
from keras.utils import to_categorical
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=40))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])








In [30]:
# set early stopping as callback
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
   X_train_scaled,
   y_train_categorical,
   callbacks=callbacks,
   epochs=100,
   shuffle=True,
   verbose=2
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/100
 - 2s - loss: 0.4881 - acc: 0.7500
Epoch 2/100




 - 1s - loss: 0.3663 - acc: 0.8047
Epoch 3/100
 - 1s - loss: 0.3554 - acc: 0.8158
Epoch 4/100
 - 1s - loss: 0.3370 - acc: 0.8291
Epoch 5/100
 - 1s - loss: 0.3233 - acc: 0.8417
Epoch 6/100
 - 1s - loss: 0.3202 - acc: 0.8491
Epoch 7/100
 - 1s - loss: 0.3117 - acc: 0.8503
Epoch 8/100
 - 1s - loss: 0.3076 - acc: 0.8583
Epoch 9/100
 - 1s - loss: 0.3159 - acc: 0.8438
Epoch 10/100
 - 1s - loss: 0.2950 - acc: 0.8648
Epoch 11/100
 - 1s - loss: 0.2889 - acc: 0.8718
Epoch 12/100
 - 1s - loss: 0.2861 - acc: 0.8701
Epoch 13/100
 - 1s - loss: 0.2859 - acc: 0.8703
Epoch 14/100
 - 1s - loss: 0.2766 - acc: 0.8838
Epoch 15/100
 - 1s - loss: 0.2787 - acc: 0.8779
Epoch 16/100
 - 1s - loss: 0.2821 - acc: 0.8741
Epoch 17/100
 - 1s - loss: 0.2779 - acc: 0.8772
Epoch 18/100
 - 1s - loss: 0.2789 - acc: 0.8793
Epoch 19/100
 - 1s - loss: 0.2707 - acc: 0.8768
Epoch 20/100
 - 1s - loss: 0.2704 - acc: 0.8772
Epoch 21/100
 - 1s - loss: 0.2706 - acc: 0.8774
Epoch 22/100
 - 1s - loss: 0.2719 - acc: 0.8783
Epoch 23/100

<keras.callbacks.History at 0x1a3f7e45f8>

# THE ABOVE IS THE BEST MODEL IS DEEP LEARNING ON ALL COLUMNS!

# Hyper Parameter Tuning  ALL COLUMNS

In [31]:
from sklearn.svm import SVC 
model0 = SVC(kernel='linear')
model0

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [32]:
from sklearn.model_selection import GridSearchCV
param_grid0 = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid0 = GridSearchCV(model0, param_grid0, verbose=3)

In [33]:
# Train the model with GridSearch
grid0.fit(X_train_scaled, y_train)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ................... C=1, gamma=0.0001, score=0.837, total=   0.4s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.833, total=   0.3s
[CV] C=1, gamma=0.0001 ...............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.7s remaining:    0.0s


[CV] ................... C=1, gamma=0.0001, score=0.842, total=   0.3s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.837, total=   0.3s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.833, total=   0.3s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.842, total=   0.3s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.837, total=   0.3s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.833, total=   0.3s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.842, total=   0.3s
[CV] C=1, gamma=0.005 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:   17.1s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=None,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [34]:
# replace grid2 with grid
print(grid0.best_params_)
print(grid0.best_score_)

{'C': 50, 'gamma': 0.0001}
0.8834636658401679


In [35]:
predictions0 = grid0.predict(X_test_scaled)

In [36]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions0,
                            target_names=["CONFIRMED", "CANDIDATE", "FALSE POSITIVE"]))

                precision    recall  f1-score   support

     CONFIRMED       0.86      0.66      0.74       441
     CANDIDATE       0.72      0.85      0.78       427
FALSE POSITIVE       0.98      1.00      0.99       880

      accuracy                           0.88      1748
     macro avg       0.85      0.84      0.84      1748
  weighted avg       0.88      0.88      0.87      1748



# Select your features (columns)

In [37]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_co', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_model_snr', 'koi_prad']]
selected_features.head(10)

Unnamed: 0,koi_fpflag_co,koi_fpflag_nt,koi_fpflag_ss,koi_model_snr,koi_prad
0,0,0,0,25.8,2.83
1,0,0,1,76.3,14.6
2,0,0,1,505.6,33.46
3,0,0,0,40.9,2.75
4,0,0,0,40.2,2.77
5,0,0,0,15.0,1.59
6,0,0,0,161.9,5.76
7,0,0,0,4304.3,13.04
8,0,0,1,5945.9,16.1
9,0,0,0,1741.5,14.59


# Create a Train Test Split

Use `koi_disposition` for the y values

In [38]:
# Split the data using train_test_split
from sklearn.model_selection import train_test_split
XX_train, XX_test, yy_train, yy_test = train_test_split(selected_features, target, random_state=42)

In [39]:
XX_train.head()

Unnamed: 0,koi_fpflag_co,koi_fpflag_nt,koi_fpflag_ss,koi_model_snr,koi_prad
6122,0,0,0,10.8,1.24
6370,0,0,1,13.8,0.86
2879,0,1,0,254.3,3.21
107,0,0,0,38.4,2.25
29,0,0,0,696.5,12.21


In [40]:
# Create a Decision Tree Classifier
clf = tree.DecisionTreeClassifier()
clf = clf.fit(XX_train, yy_train)
clf.score(XX_test, yy_test)

0.8278032036613272

In [41]:
clf.score(XX_train, yy_train)

0.9986648865153538

In [42]:
rf0 = RandomForestClassifier(n_estimators=500)
rf0 = rf0.fit(XX_train, yy_train)

In [43]:
print(f"Training Data Score: {rf0.score(XX_train, yy_train)}")
print(f"Testing Data Score: {rf0.score(XX_test, yy_test)}")

Training Data Score: 0.9986648865153538
Testing Data Score: 0.8541189931350115


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [44]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler

In [45]:
# Scale your data
scaler = MinMaxScaler().fit(XX_train)

XX_train_scaled = scaler.transform(XX_train)
XX_test_scaled = scaler.transform(XX_test)

# Train the Model



In [46]:
#picking the random forest model again
rf2 = RandomForestClassifier(n_estimators=500)
rf2 = rf2.fit(XX_train_scaled, yy_train)

In [47]:
#what is model2
print(f"Training Data Score: {rf2.score(XX_train_scaled, yy_train)}")
print(f"Testing Data Score: {rf2.score(XX_test_scaled, yy_test)}")

Training Data Score: 0.989509822620637
Testing Data Score: 0.8609839816933639


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [48]:
# Create the GridSearchCV model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [49]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [50]:
# Train the model with GridSearch
grid.fit(XX_train_scaled, yy_train)

Fitting 3 folds for each of 16 candidates, totalling 48 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s


[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.739, total=   0.1s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.737, total=   0.1s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.738, total=   0.1s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.739, total=   0.1s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.737, total=   0.1s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.738, total=   0.1s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.739, total=   0.1s
[CV] C

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:    4.9s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=None,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [51]:
# replace grid2 with grid
print(grid.best_params_)
print(grid.best_score_)

{'C': 10, 'gamma': 0.0001}
0.7663551401869159


In [52]:
predictions = grid.predict(XX_test_scaled)

In [53]:
from sklearn.metrics import classification_report
print(classification_report(yy_test, predictions,
                            target_names=["CONFIRMED", "CANDIDATE", "FALSE POSITIVE"]))

                precision    recall  f1-score   support

     CONFIRMED       0.45      0.96      0.61       411
     CANDIDATE       0.64      0.03      0.06       484
FALSE POSITIVE       0.98      0.97      0.97       853

      accuracy                           0.71      1748
     macro avg       0.69      0.66      0.55      1748
  weighted avg       0.76      0.71      0.64      1748



In [54]:
from keras.models import Sequential
from keras.layers import Dense


In [55]:
# def build_model():
#     model = Sequential()
#     model.add(Dense(20, input_dim=5, activation='relu'))
#     model.add(Dense(40, activation='relu'))
#     model.add(Dense(1, activation='sigmoid'))
#     # Compile model   categorical_crossentropy
#     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#     return model

# from keras.wrappers.scikit_learn import KerasClassifier
# keras_model = build_model()
# keras_model.fit(XX_train, yy_train, epochs=5, batch_size=100, verbose=3)


from keras.utils import to_categorical
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping
label_encoder = LabelEncoder()
label_encoder.fit(yy_train)
encoded_y_train = label_encoder.transform(yy_train)
encoded_y_test = label_encoder.transform(yy_test)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=5))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])



In [56]:
# set early stopping as callback
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
   XX_train_scaled,
   y_train_categorical,
   callbacks=callbacks,
   epochs=100,
   shuffle=True,
   verbose=2
)

Epoch 1/100
 - 3s - loss: 0.5526 - acc: 0.6874
Epoch 2/100




 - 1s - loss: 0.4438 - acc: 0.7482
Epoch 3/100
 - 1s - loss: 0.4437 - acc: 0.7402
Epoch 4/100
 - 1s - loss: 0.4435 - acc: 0.7360
Epoch 5/100
 - 1s - loss: 0.4422 - acc: 0.7355
Epoch 6/100
 - 1s - loss: 0.4414 - acc: 0.7490
Epoch 7/100
 - 1s - loss: 0.4412 - acc: 0.7536
Epoch 8/100
 - 1s - loss: 0.4411 - acc: 0.7519
Epoch 9/100
 - 1s - loss: 0.4401 - acc: 0.7574
Epoch 10/100
 - 1s - loss: 0.4387 - acc: 0.7532
Epoch 11/100
 - 1s - loss: 0.4373 - acc: 0.7744
Epoch 12/100
 - 1s - loss: 0.4370 - acc: 0.7677
Epoch 13/100
 - 1s - loss: 0.4359 - acc: 0.7665
Epoch 14/100
 - 1s - loss: 0.4360 - acc: 0.7704
Epoch 15/100
 - 1s - loss: 0.4346 - acc: 0.7744
Epoch 16/100
 - 1s - loss: 0.4326 - acc: 0.7828
Epoch 17/100
 - 1s - loss: 0.4306 - acc: 0.7858
Epoch 18/100
 - 1s - loss: 0.4285 - acc: 0.7961
Epoch 19/100
 - 1s - loss: 0.4245 - acc: 0.7950
Epoch 20/100
 - 1s - loss: 0.4191 - acc: 0.8093
Epoch 21/100
 - 1s - loss: 0.4146 - acc: 0.8205
Epoch 22/100
 - 1s - loss: 0.4057 - acc: 0.8261
Epoch 23/100

<keras.callbacks.History at 0x1a46f4ec18>

# Save the Model

In [58]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
#if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'Jam_Paydavousi.sav'
joblib.dump(RandomForestClassifier, filename)

['Jam_Paydavousi.sav']