## Deep Learning Model

In [1]:
# Libraries
import pandas as pd

### Read the CSV and Basic Data Cleaning

In [2]:
df = pd.read_csv("../Resources/exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


### Select features

In [3]:
# Separate column to predict and get dummies
data_multiclass = pd.get_dummies(df, columns=['koi_disposition'])
data_multiclass.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag,koi_disposition_CANDIDATE,koi_disposition_CONFIRMED,koi_disposition_FALSE POSITIVE
0,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,-0.00352,...,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347,0,1,0
1,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,-0.000581,...,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436,0,0,1
2,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,-0.000115,...,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597,0,0,1
3,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,-0.00113,...,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509,0,1,0
4,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,-0.0019,...,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714,0,1,0


In [4]:
# Set features. This will also be used as your x values.
# We are choosing some parameters related to the light curve when a planet passes infront of a star and its period.
selected_features = data_multiclass[['koi_fpflag_ss', 'koi_fpflag_co', 'koi_fpflag_nt', 'koi_period', 'koi_time0bk', 'koi_depth']]

In [5]:
# Set y feature.
# The parameters to classify. Notice there are 3 classes.
y = data_multiclass[['koi_disposition_CANDIDATE', 'koi_disposition_CONFIRMED', 'koi_disposition_FALSE POSITIVE']]

### Create a Train Test Split

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, y, random_state=1)

### Pre-processing

Scale data

In [9]:
# Scale data and define seed
from sklearn.preprocessing import StandardScaler
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)

In [10]:
X_scaler = StandardScaler().fit(X_train)

In [11]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Train the Model

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               700       
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 303       
Total params: 1,003
Trainable params: 1,003
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
# Fit the model to the training data
model.fit(X_train_scaled, y_train, epochs=200, shuffle='True', verbose=2)

Train on 5243 samples
Epoch 1/200
5243/5243 - 0s - loss: 0.6479 - accuracy: 0.7194
Epoch 2/200
5243/5243 - 0s - loss: 0.4443 - accuracy: 0.7692
Epoch 3/200
5243/5243 - 0s - loss: 0.4337 - accuracy: 0.7660
Epoch 4/200
5243/5243 - 0s - loss: 0.4299 - accuracy: 0.7679
Epoch 5/200
5243/5243 - 0s - loss: 0.4265 - accuracy: 0.7692
Epoch 6/200
5243/5243 - 0s - loss: 0.4244 - accuracy: 0.7707
Epoch 7/200
5243/5243 - 0s - loss: 0.4226 - accuracy: 0.7753
Epoch 8/200
5243/5243 - 0s - loss: 0.4213 - accuracy: 0.7774
Epoch 9/200
5243/5243 - 0s - loss: 0.4202 - accuracy: 0.7725
Epoch 10/200
5243/5243 - 0s - loss: 0.4193 - accuracy: 0.7740
Epoch 11/200
5243/5243 - 0s - loss: 0.4189 - accuracy: 0.7746
Epoch 12/200
5243/5243 - 0s - loss: 0.4180 - accuracy: 0.7774
Epoch 13/200
5243/5243 - 0s - loss: 0.4175 - accuracy: 0.7725
Epoch 14/200
5243/5243 - 0s - loss: 0.4177 - accuracy: 0.7759
Epoch 15/200
5243/5243 - 0s - loss: 0.4179 - accuracy: 0.7711
Epoch 16/200
5243/5243 - 0s - loss: 0.4157 - accuracy: 0.

Epoch 133/200
5243/5243 - 0s - loss: 0.4021 - accuracy: 0.7810
Epoch 134/200
5243/5243 - 0s - loss: 0.4030 - accuracy: 0.7822
Epoch 135/200
5243/5243 - 0s - loss: 0.4027 - accuracy: 0.7826
Epoch 136/200
5243/5243 - 0s - loss: 0.4025 - accuracy: 0.7809
Epoch 137/200
5243/5243 - 0s - loss: 0.4031 - accuracy: 0.7810
Epoch 138/200
5243/5243 - 0s - loss: 0.4021 - accuracy: 0.7759
Epoch 139/200
5243/5243 - 0s - loss: 0.4029 - accuracy: 0.7837
Epoch 140/200
5243/5243 - 0s - loss: 0.4023 - accuracy: 0.7818
Epoch 141/200
5243/5243 - 0s - loss: 0.4034 - accuracy: 0.7810
Epoch 142/200
5243/5243 - 0s - loss: 0.4010 - accuracy: 0.7810
Epoch 143/200
5243/5243 - 0s - loss: 0.4030 - accuracy: 0.7835
Epoch 144/200
5243/5243 - 0s - loss: 0.4021 - accuracy: 0.7818
Epoch 145/200
5243/5243 - 0s - loss: 0.4022 - accuracy: 0.7763
Epoch 146/200
5243/5243 - 0s - loss: 0.4033 - accuracy: 0.7768
Epoch 147/200
5243/5243 - 0s - loss: 0.4022 - accuracy: 0.7847
Epoch 148/200
5243/5243 - 0s - loss: 0.4024 - accuracy:

<tensorflow.python.keras.callbacks.History at 0x7faa2c003550>

### Evaluate the Model

In [17]:
print(f"Training Data Score: {model.evaluate(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model.evaluate(X_test_scaled, y_test)}")

Training Data Score: [0.3976842041816417, 0.78638184]
Testing Data Score: [0.3867416693498668, 0.7957666]


### Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [18]:
# Create the GridSearchCV model

In [19]:
# Train the model with GridSearch

In [None]:
print(grid2.best_params_)
print(grid2.best_score_)

### Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'your_name.sav'
joblib.dump(your_model, filename)