# Neural Network with Keras

In [1]:
# Update sklearn to prevent version mismatches

!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\cindy\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [5]:
df.columns

Index(['koi_disposition', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period', 'koi_period_err1', 'koi_period_err2',
       'koi_time0bk', 'koi_time0bk_err1', 'koi_time0bk_err2', 'koi_impact',
       'koi_impact_err1', 'koi_impact_err2', 'koi_duration',
       'koi_duration_err1', 'koi_duration_err2', 'koi_depth', 'koi_depth_err1',
       'koi_depth_err2', 'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
       'koi_teq', 'koi_insol', 'koi_insol_err1', 'koi_insol_err2',
       'koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 'koi_steff_err1',
       'koi_steff_err2', 'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
       'koi_srad', 'koi_srad_err1', 'koi_srad_err2', 'ra', 'dec',
       'koi_kepmag'],
      dtype='object')

# Select your features (columns)

In [6]:
# Set features. This will also be used as your X values.  
selected_features = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
                        'koi_fpflag_ec', 'koi_period', 'koi_period_err1', 'koi_period_err2',
                        'koi_time0bk', 'koi_time0bk_err1', 'koi_time0bk_err2',
                        'koi_impact', 'koi_impact_err1', 'koi_impact_err2',
                        'koi_duration', 'koi_duration_err1', 'koi_duration_err2',
                        'koi_depth', 'koi_depth_err1', 'koi_depth_err2',
                        'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
                        'koi_teq', 'koi_insol', 'koi_insol_err1', 'koi_insol_err2',
                        'koi_model_snr', 'koi_steff', 'koi_steff_err1', 'koi_steff_err2',
                        'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
                        'koi_srad', 'koi_srad_err1', 'koi_srad_err2',
                        'ra', 'dec', 'koi_kepmag']]

# Create a Train Test Split

Use `koi_disposition` for the y values;  

In [7]:
X = selected_features
y = df["koi_disposition"]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
y_train.head()

6122         CANDIDATE
6370    FALSE POSITIVE
2879    FALSE POSITIVE
107          CONFIRMED
29           CANDIDATE
Name: koi_disposition, dtype: object

In [9]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
6122,0,0,0,0,6.768901,7.38e-05,-7.38e-05,133.07724,0.00844,-0.00844,...,-171,4.327,0.153,-0.187,1.125,0.31,-0.207,294.40472,39.351681,14.725
6370,0,1,0,1,0.733726,6.06e-06,-6.06e-06,132.02005,0.00795,-0.00795,...,-175,4.578,0.033,-0.187,0.797,0.211,-0.056,284.50391,42.46386,15.77
2879,1,0,0,0,7.652707,6.54e-05,-6.54e-05,134.46038,0.00619,-0.00619,...,-189,4.481,0.05,-0.2,0.963,0.29,-0.097,295.50211,38.98354,13.099
107,0,0,0,0,7.953547,1.91e-05,-1.91e-05,174.66224,0.00182,-0.00182,...,-85,4.536,0.056,-0.016,0.779,0.023,-0.049,291.15878,40.750271,15.66
29,0,0,0,0,4.959319,5.15e-07,-5.15e-07,172.258529,8.3e-05,-8.3e-05,...,-77,4.359,0.11,-0.11,1.082,0.173,-0.13,292.16705,48.727589,15.263


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [10]:
# Scale (and transform) your data
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


#  Label encoding converts the classes to integers

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# One-hot encoding creates an array out of the classes

from tensorflow.keras.utils import to_categorical

y_train_cat = to_categorical(encoded_y_train)
y_test_cat = to_categorical(encoded_y_test)


In [11]:
print(encoded_y_train)
print(encoded_y_test)
print(y_train_cat)

[0 2 2 ... 2 2 2]
[2 0 2 ... 1 1 1]
[[1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 ...
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [16]:
# Create a sequential model

from tensorflow.keras.models import Sequential
model = Sequential()
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x20cd6e2b508>

In [17]:
# Add the first layer 

from tensorflow.keras.layers import Dense
number_inputs = 39
number_hidden_nodes = 6
model.add(Dense(units=number_hidden_nodes, 
               activation='relu',input_dim=number_inputs))

number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 6)                 240       
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 21        
Total params: 261
Trainable params: 261
Non-trainable params: 0
_________________________________________________________________


# Train the Model



In [18]:
#  compile and fit the model

model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

model.fit(X_train_scaled, 
          y_train_cat,
         epochs=1000,
         shuffle=True,
         verbose=2)

Train on 5243 samples
Epoch 1/1000
5243/5243 - 1s - loss: 1.0098 - accuracy: 0.4766
Epoch 2/1000
5243/5243 - 0s - loss: 0.8651 - accuracy: 0.5131
Epoch 3/1000
5243/5243 - 0s - loss: 0.7075 - accuracy: 0.6874
Epoch 4/1000
5243/5243 - 0s - loss: 0.5861 - accuracy: 0.7164
Epoch 5/1000
5243/5243 - 0s - loss: 0.5106 - accuracy: 0.7044
Epoch 6/1000
5243/5243 - 0s - loss: 0.4658 - accuracy: 0.7217
Epoch 7/1000
5243/5243 - 0s - loss: 0.4387 - accuracy: 0.7355
Epoch 8/1000
5243/5243 - 0s - loss: 0.4215 - accuracy: 0.7480
Epoch 9/1000
5243/5243 - 0s - loss: 0.4106 - accuracy: 0.7612
Epoch 10/1000
5243/5243 - 0s - loss: 0.4030 - accuracy: 0.7633
Epoch 11/1000
5243/5243 - 0s - loss: 0.3970 - accuracy: 0.7776
Epoch 12/1000
5243/5243 - 0s - loss: 0.3928 - accuracy: 0.7864
Epoch 13/1000
5243/5243 - 0s - loss: 0.3895 - accuracy: 0.7858
Epoch 14/1000
5243/5243 - 0s - loss: 0.3864 - accuracy: 0.7917
Epoch 15/1000
5243/5243 - 0s - loss: 0.3835 - accuracy: 0.8022
Epoch 16/1000
5243/5243 - 0s - loss: 0.380

Epoch 131/1000
5243/5243 - 0s - loss: 0.2817 - accuracy: 0.8808
Epoch 132/1000
5243/5243 - 0s - loss: 0.2841 - accuracy: 0.8795
Epoch 133/1000
5243/5243 - 0s - loss: 0.2814 - accuracy: 0.8808
Epoch 134/1000
5243/5243 - 0s - loss: 0.2808 - accuracy: 0.8821
Epoch 135/1000
5243/5243 - 0s - loss: 0.2813 - accuracy: 0.8791
Epoch 136/1000
5243/5243 - 0s - loss: 0.2823 - accuracy: 0.8785
Epoch 137/1000
5243/5243 - 0s - loss: 0.2817 - accuracy: 0.8829
Epoch 138/1000
5243/5243 - 0s - loss: 0.2808 - accuracy: 0.8802
Epoch 139/1000
5243/5243 - 0s - loss: 0.2806 - accuracy: 0.8798
Epoch 140/1000
5243/5243 - 0s - loss: 0.2806 - accuracy: 0.8798
Epoch 141/1000
5243/5243 - 0s - loss: 0.2816 - accuracy: 0.8793
Epoch 142/1000
5243/5243 - 0s - loss: 0.2817 - accuracy: 0.8817
Epoch 143/1000
5243/5243 - 0s - loss: 0.2812 - accuracy: 0.8808
Epoch 144/1000
5243/5243 - 0s - loss: 0.2804 - accuracy: 0.8844
Epoch 145/1000
5243/5243 - 0s - loss: 0.2805 - accuracy: 0.8823
Epoch 146/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2712 - accuracy: 0.8859
Epoch 260/1000
5243/5243 - 0s - loss: 0.2703 - accuracy: 0.8886
Epoch 261/1000
5243/5243 - 0s - loss: 0.2701 - accuracy: 0.8882
Epoch 262/1000
5243/5243 - 0s - loss: 0.2702 - accuracy: 0.8850
Epoch 263/1000
5243/5243 - 0s - loss: 0.2721 - accuracy: 0.8840
Epoch 264/1000
5243/5243 - 0s - loss: 0.2698 - accuracy: 0.8886
Epoch 265/1000
5243/5243 - 0s - loss: 0.2697 - accuracy: 0.8867
Epoch 266/1000
5243/5243 - 0s - loss: 0.2699 - accuracy: 0.8877
Epoch 267/1000
5243/5243 - 0s - loss: 0.2699 - accuracy: 0.8880
Epoch 268/1000
5243/5243 - 0s - loss: 0.2701 - accuracy: 0.8861
Epoch 269/1000
5243/5243 - 0s - loss: 0.2689 - accuracy: 0.8892
Epoch 270/1000
5243/5243 - 0s - loss: 0.2698 - accuracy: 0.8894
Epoch 271/1000
5243/5243 - 0s - loss: 0.2698 - accuracy: 0.8877
Epoch 272/1000
5243/5243 - 0s - loss: 0.2710 - accuracy: 0.8856
Epoch 273/1000
5243/5243 - 0s - loss: 0.2681 - accuracy: 0.8899
Epoch 274/1000
5243/5243 - 0s - loss: 0.2695 - accuracy

Epoch 388/1000
5243/5243 - 0s - loss: 0.2572 - accuracy: 0.8911
Epoch 389/1000
5243/5243 - 0s - loss: 0.2582 - accuracy: 0.8924
Epoch 390/1000
5243/5243 - 0s - loss: 0.2578 - accuracy: 0.8940
Epoch 391/1000
5243/5243 - 0s - loss: 0.2570 - accuracy: 0.8917
Epoch 392/1000
5243/5243 - 0s - loss: 0.2565 - accuracy: 0.8941
Epoch 393/1000
5243/5243 - 0s - loss: 0.2577 - accuracy: 0.8926
Epoch 394/1000
5243/5243 - 0s - loss: 0.2573 - accuracy: 0.8951
Epoch 395/1000
5243/5243 - 0s - loss: 0.2571 - accuracy: 0.8920
Epoch 396/1000
5243/5243 - 0s - loss: 0.2570 - accuracy: 0.8917
Epoch 397/1000
5243/5243 - 0s - loss: 0.2575 - accuracy: 0.8924
Epoch 398/1000
5243/5243 - 0s - loss: 0.2587 - accuracy: 0.8934
Epoch 399/1000
5243/5243 - 0s - loss: 0.2587 - accuracy: 0.8905
Epoch 400/1000
5243/5243 - 0s - loss: 0.2570 - accuracy: 0.8930
Epoch 401/1000
5243/5243 - 0s - loss: 0.2563 - accuracy: 0.8934
Epoch 402/1000
5243/5243 - 0s - loss: 0.2564 - accuracy: 0.8941
Epoch 403/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2517 - accuracy: 0.8961
Epoch 517/1000
5243/5243 - 0s - loss: 0.2500 - accuracy: 0.8949
Epoch 518/1000
5243/5243 - 0s - loss: 0.2520 - accuracy: 0.8930
Epoch 519/1000
5243/5243 - 0s - loss: 0.2506 - accuracy: 0.8970
Epoch 520/1000
5243/5243 - 0s - loss: 0.2503 - accuracy: 0.8957
Epoch 521/1000
5243/5243 - 0s - loss: 0.2504 - accuracy: 0.8976
Epoch 522/1000
5243/5243 - 0s - loss: 0.2521 - accuracy: 0.8961
Epoch 523/1000
5243/5243 - 0s - loss: 0.2512 - accuracy: 0.8941
Epoch 524/1000
5243/5243 - 0s - loss: 0.2536 - accuracy: 0.8962
Epoch 525/1000
5243/5243 - 0s - loss: 0.2507 - accuracy: 0.8964
Epoch 526/1000
5243/5243 - 0s - loss: 0.2503 - accuracy: 0.8966
Epoch 527/1000
5243/5243 - 0s - loss: 0.2489 - accuracy: 0.8983
Epoch 528/1000
5243/5243 - 0s - loss: 0.2499 - accuracy: 0.8962
Epoch 529/1000
5243/5243 - 0s - loss: 0.2509 - accuracy: 0.8981
Epoch 530/1000
5243/5243 - 0s - loss: 0.2492 - accuracy: 0.8953
Epoch 531/1000
5243/5243 - 0s - loss: 0.2495 - accuracy

Epoch 645/1000
5243/5243 - 0s - loss: 0.2475 - accuracy: 0.8972
Epoch 646/1000
5243/5243 - 0s - loss: 0.2503 - accuracy: 0.8968
Epoch 647/1000
5243/5243 - 0s - loss: 0.2475 - accuracy: 0.8972
Epoch 648/1000
5243/5243 - 0s - loss: 0.2494 - accuracy: 0.8951
Epoch 649/1000
5243/5243 - 0s - loss: 0.2475 - accuracy: 0.8980
Epoch 650/1000
5243/5243 - 0s - loss: 0.2476 - accuracy: 0.8951
Epoch 651/1000
5243/5243 - 0s - loss: 0.2470 - accuracy: 0.8976
Epoch 652/1000
5243/5243 - 0s - loss: 0.2489 - accuracy: 0.8981
Epoch 653/1000
5243/5243 - 0s - loss: 0.2505 - accuracy: 0.8957
Epoch 654/1000
5243/5243 - 0s - loss: 0.2480 - accuracy: 0.8957
Epoch 655/1000
5243/5243 - 0s - loss: 0.2474 - accuracy: 0.8955
Epoch 656/1000
5243/5243 - 0s - loss: 0.2468 - accuracy: 0.8993
Epoch 657/1000
5243/5243 - 0s - loss: 0.2461 - accuracy: 0.8999
Epoch 658/1000
5243/5243 - 0s - loss: 0.2501 - accuracy: 0.8957
Epoch 659/1000
5243/5243 - 0s - loss: 0.2480 - accuracy: 0.8957
Epoch 660/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2447 - accuracy: 0.8974
Epoch 774/1000
5243/5243 - 0s - loss: 0.2454 - accuracy: 0.8983
Epoch 775/1000
5243/5243 - 0s - loss: 0.2467 - accuracy: 0.8972
Epoch 776/1000
5243/5243 - 0s - loss: 0.2455 - accuracy: 0.8995
Epoch 777/1000
5243/5243 - 0s - loss: 0.2456 - accuracy: 0.8966
Epoch 778/1000
5243/5243 - 0s - loss: 0.2458 - accuracy: 0.8966
Epoch 779/1000
5243/5243 - 0s - loss: 0.2471 - accuracy: 0.8949
Epoch 780/1000
5243/5243 - 0s - loss: 0.2452 - accuracy: 0.8983
Epoch 781/1000
5243/5243 - 0s - loss: 0.2463 - accuracy: 0.8974
Epoch 782/1000
5243/5243 - 0s - loss: 0.2472 - accuracy: 0.8962
Epoch 783/1000
5243/5243 - 0s - loss: 0.2454 - accuracy: 0.8989
Epoch 784/1000
5243/5243 - 0s - loss: 0.2465 - accuracy: 0.8980
Epoch 785/1000
5243/5243 - 0s - loss: 0.2459 - accuracy: 0.9001
Epoch 786/1000
5243/5243 - 0s - loss: 0.2451 - accuracy: 0.8995
Epoch 787/1000
5243/5243 - 0s - loss: 0.2457 - accuracy: 0.8943
Epoch 788/1000
5243/5243 - 0s - loss: 0.2465 - accuracy

Epoch 902/1000
5243/5243 - 0s - loss: 0.2461 - accuracy: 0.8962
Epoch 903/1000
5243/5243 - 0s - loss: 0.2479 - accuracy: 0.8972
Epoch 904/1000
5243/5243 - 0s - loss: 0.2450 - accuracy: 0.8974
Epoch 905/1000
5243/5243 - 0s - loss: 0.2468 - accuracy: 0.8949
Epoch 906/1000
5243/5243 - 0s - loss: 0.2458 - accuracy: 0.8966
Epoch 907/1000
5243/5243 - 0s - loss: 0.2445 - accuracy: 0.8981
Epoch 908/1000
5243/5243 - 0s - loss: 0.2447 - accuracy: 0.8987
Epoch 909/1000
5243/5243 - 0s - loss: 0.2446 - accuracy: 0.8991
Epoch 910/1000
5243/5243 - 0s - loss: 0.2442 - accuracy: 0.9002
Epoch 911/1000
5243/5243 - 0s - loss: 0.2438 - accuracy: 0.8976
Epoch 912/1000
5243/5243 - 0s - loss: 0.2456 - accuracy: 0.8983
Epoch 913/1000
5243/5243 - 0s - loss: 0.2460 - accuracy: 0.8981
Epoch 914/1000
5243/5243 - 0s - loss: 0.2452 - accuracy: 0.8978
Epoch 915/1000
5243/5243 - 0s - loss: 0.2453 - accuracy: 0.8966
Epoch 916/1000
5243/5243 - 0s - loss: 0.2451 - accuracy: 0.8987
Epoch 917/1000
5243/5243 - 0s - loss: 0.

<tensorflow.python.keras.callbacks.History at 0x20cd756f788>

In [19]:
#Evaluate the model using testing data

model_loss,model_accuracy = model.evaluate(
    X_test_scaled, y_test_cat, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1 - 0s - loss: 0.2822 - accuracy: 0.8896
Loss: 0.2643268640308686, Accuracy: 0.8895881175994873


# Save the Model

In [23]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash

# import joblib
# filename = 'creznikov_nn.sav'
# joblib.dump(model, filename)

model.save("creznikov_nn.h5")