In [1]:
#Import innitial dependencies
import warnings
warnings.simplefilter('ignore')

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Read the csv file into a pandas DataFrame
dataset = pd.read_csv('../data/cumulative.csv')
dataset.head()

Unnamed: 0,rowid,kepid,kepoi_name,kepler_name,koi_disposition,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,1,10797460,K00752.01,Kepler-227 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,2,10797460,K00752.02,Kepler-227 c,CONFIRMED,CANDIDATE,0.969,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,3,10811496,K00753.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,4,10848459,K00754.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
4,5,10854555,K00755.01,Kepler-664 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-211.0,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509


In [3]:
# Create a new DataFrame object specific columns(numeric values)
NewDataset = pd.DataFrame(dataset, columns=['koi_disposition','koi_score', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co', 'koi_steff_err2', 'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2', 'koi_srad', 'koi_srad_err1', 'koi_srad_err2', 'ra', 'dec' ,'koi_kepmag'])
NewDataset.head()

Unnamed: 0,koi_disposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,1.0,0,0,0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,CONFIRMED,0.969,0,0,0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,FALSE POSITIVE,0.0,0,1,0,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,FALSE POSITIVE,0.0,0,1,0,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
4,CONFIRMED,1.0,0,0,0,-211.0,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509


In [4]:
#Drop NaN results 
NewDataset=NewDataset.dropna()
NewDataset.head()

Unnamed: 0,koi_disposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,1.0,0,0,0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,CONFIRMED,0.969,0,0,0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,FALSE POSITIVE,0.0,0,1,0,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,FALSE POSITIVE,0.0,0,1,0,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
4,CONFIRMED,1.0,0,0,0,-211.0,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509


In [5]:
# Assign X (data) and y (target) - And review 
X = NewDataset.drop("koi_disposition", axis=1)
y = NewDataset["koi_disposition"]
print(X.shape, y.shape)

(7892, 14) (7892,)


In [6]:
# Split data into training and testing
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [7]:
#Scale our data before using multilayer perceptron model
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


In [9]:
#create a sequential model
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [10]:
y_train_categorical

array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)

In [11]:
#Specify the layers Input, Hidden and Output - Add number of units we are trying to predict (3).
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=14))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))






In [12]:
#compile the model using a loss function and optimizer and specify additional training metrics such as accuracy.
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])





In [13]:
#Display model summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               1500      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 303       
Total params: 32,103
Trainable params: 32,103
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Fit (train) the model
callbacks = [EarlyStopping(monitor='val_loss', patience=2)]
model.fit(
    X_train_scaled,
    y_train_categorical,
    callbacks=callbacks,
    epochs=60,
    shuffle=True,
    verbose=2
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/60
 - 5s - loss: 0.4455 - acc: 0.7745
Epoch 2/60
 - 1s - loss: 0.3654 - acc: 0.8032
Epoch 3/60
 - 1s - loss: 0.3665 - acc: 0.8015
Epoch 4/60
 - 1s - loss: 0.3578 - acc: 0.8109
Epoch 5/60
 - 2s - loss: 0.3543 - acc: 0.8157
Epoch 6/60
 - 1s - loss: 0.3569 - acc: 0.8108
Epoch 7/60
 - 1s - loss: 0.3492 - acc: 0.8189
Epoch 8/60
 - 1s - loss: 0.3508 - acc: 0.8138
Epoch 9/60
 - 1s - loss: 0.3480 - acc: 0.8202
Epoch 10/60
 - 1s - loss: 0.3486 - acc: 0.8150
Epoch 11/60
 - 1s - loss: 0.3486 - acc: 0.8201
Epoch 12/60
 - 1s - loss: 0.3442 - acc: 0.8182
Epoch 13/60
 - 1s - loss: 0.3477 - acc: 0.8233
Epoch 14/60
 - 1s - loss: 0.3430 - acc: 0.8246
Epoch 15/60
 - 1s - loss: 0.3399 - acc: 0.8256
Epoch 16/60
 - 1s - loss: 0.3441 - acc: 0.8245
Epoch 17/60
 - 1s - loss: 0.3398 - acc: 0.8243
Epoch 18/60
 - 1s - loss: 0.3391 - acc: 0.8251
Epoch 19/60
 - 1s - loss: 0.3430 - acc: 0.8204
Epoch 20/60
 - 1s - l

<keras.callbacks.History at 0x1a3bae0358>

In [15]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.3527214423811381, Accuracy: 0.8236188547779205


In [16]:
#Load the model
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [17]:
# Evaluate the model using the testing data
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['FALSE POSITIVE' 'CONFIRMED' 'FALSE POSITIVE' 'CONFIRMED' 'CONFIRMED']
Actual Labels: ['FALSE POSITIVE', 'CONFIRMED', 'FALSE POSITIVE', 'CONFIRMED', 'CONFIRMED']


In [18]:
import joblib
filename = 'deep_learning_model.sav'
joblib.dump(model, filename)

['deep_learning_model.sav']