# Deep Neural Network

In [1]:
import pandas as pd

# ML libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import warnings 
warnings.filterwarnings("ignore")

Using TensorFlow backend.


### Read the CSV and Perform Basic Data Cleaning

In [2]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()

In [3]:
# Create a test train split
y = df['koi_disposition']
X = df.drop(columns=['koi_disposition'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

### Pre-Processing

Scale the data using the MinMaxScaler and perform some feature selection

In [4]:
# Apply label encoding because y is categorical data
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# for label, original_class in zip(encoded_y_train, y):
#     print('Original Class: ' + str(original_class))
#     print('Encoded Label: ' + str(label))
#     print('-' * 12)

In [5]:
# Apply one-hot encoding
categorical_y_train = to_categorical(encoded_y_train)
categorical_y_test = to_categorical(encoded_y_test)

In [6]:
# Scale your data
X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Train the Model

In [7]:
# normal neural network
model_nnn = Sequential()

model_nnn.add(Dense(units=160, activation='relu', input_dim=40))
model_nnn.add(Dense(units=3, activation='softmax'))

model_nnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_nnn.fit(X_train_scaled, categorical_y_train, epochs=100, shuffle=True, verbose=2)

Train on 5243 samples
Epoch 1/100
5243/5243 - 1s - loss: 0.6784 - accuracy: 0.6798
Epoch 2/100
5243/5243 - 0s - loss: 0.4060 - accuracy: 0.8013
Epoch 3/100
5243/5243 - 0s - loss: 0.3675 - accuracy: 0.8194
Epoch 4/100
5243/5243 - 0s - loss: 0.3572 - accuracy: 0.8184
Epoch 5/100
5243/5243 - 0s - loss: 0.3490 - accuracy: 0.8264
Epoch 6/100
5243/5243 - 0s - loss: 0.3402 - accuracy: 0.8390
Epoch 7/100
5243/5243 - 0s - loss: 0.3329 - accuracy: 0.8465
Epoch 8/100
5243/5243 - 0s - loss: 0.3338 - accuracy: 0.8367
Epoch 9/100
5243/5243 - 0s - loss: 0.3261 - accuracy: 0.8510
Epoch 10/100
5243/5243 - 0s - loss: 0.3219 - accuracy: 0.8508
Epoch 11/100
5243/5243 - 0s - loss: 0.3207 - accuracy: 0.8510
Epoch 12/100
5243/5243 - 0s - loss: 0.3165 - accuracy: 0.8545
Epoch 13/100
5243/5243 - 0s - loss: 0.3168 - accuracy: 0.8558
Epoch 14/100
5243/5243 - 0s - loss: 0.3093 - accuracy: 0.8610
Epoch 15/100
5243/5243 - 0s - loss: 0.3069 - accuracy: 0.8659
Epoch 16/100
5243/5243 - 0s - loss: 0.3062 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x23371cd17c8>

In [8]:
# evaluate the normal model
model_nnn_loss, model_nnn_accuracy = model_nnn.evaluate(X_train_scaled, categorical_y_train, verbose=3)
print(f"Normal Neural Network - Loss: {model_nnn_loss}, Accuracy: {model_nnn_accuracy}")

Normal Neural Network - Loss: 0.24563201407587712, Accuracy: 0.8971962332725525


In [9]:
# deep neural network
model_dnn = Sequential()

model_dnn.add(Dense(units=160, activation='relu', input_dim=40))
model_dnn.add(Dense(units=160, activation='relu'))
model_dnn.add(Dense(units=160, activation='relu'))
model_dnn.add(Dense(units=3, activation='softmax'))

model_dnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_scaled, categorical_y_train, epochs=100, shuffle=True, verbose=2)

Train on 5243 samples
Epoch 1/100
5243/5243 - 1s - loss: 0.4702 - accuracy: 0.7526
Epoch 2/100
5243/5243 - 0s - loss: 0.3746 - accuracy: 0.8064
Epoch 3/100
5243/5243 - 0s - loss: 0.3628 - accuracy: 0.8152
Epoch 4/100
5243/5243 - 0s - loss: 0.3515 - accuracy: 0.8249
Epoch 5/100
5243/5243 - 0s - loss: 0.3414 - accuracy: 0.8356
Epoch 6/100
5243/5243 - 0s - loss: 0.3316 - accuracy: 0.8463
Epoch 7/100
5243/5243 - 0s - loss: 0.3244 - accuracy: 0.8459
Epoch 8/100
5243/5243 - 0s - loss: 0.3195 - accuracy: 0.8507
Epoch 9/100
5243/5243 - 0s - loss: 0.3079 - accuracy: 0.8615
Epoch 10/100
5243/5243 - 0s - loss: 0.3090 - accuracy: 0.8581
Epoch 11/100
5243/5243 - 0s - loss: 0.3030 - accuracy: 0.8631
Epoch 12/100
5243/5243 - 0s - loss: 0.2997 - accuracy: 0.8680
Epoch 13/100
5243/5243 - 0s - loss: 0.2966 - accuracy: 0.8707
Epoch 14/100
5243/5243 - 0s - loss: 0.2880 - accuracy: 0.8735
Epoch 15/100
5243/5243 - 0s - loss: 0.2888 - accuracy: 0.8692
Epoch 16/100
5243/5243 - 0s - loss: 0.2886 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x23373771bc8>

In [10]:
# evaluate the deep model
model_dnn_loss, model_dnn_accuracy = model_dnn.evaluate(X_train_scaled, categorical_y_train, verbose=3)
print(f"Deep Neural Network - Loss: {model_dnn_loss}, Accuracy: {model_dnn_accuracy}")

Deep Neural Network - Loss: 0.19392021073143195, Accuracy: 0.9166507720947266


In [11]:
# DNN predictions
encoded_dnn_predictions = model_dnn.predict_classes(X_test_scaled)
prediction_labels_dnn = label_encoder.inverse_transform(encoded_dnn_predictions)

dnn_df = pd.DataFrame({
    "DNN Predicted Classes": prediction_labels_dnn, 
    "DNN Actual Labels": list(y_test)
})
dnn_df.head(20)

Unnamed: 0,DNN Predicted Classes,DNN Actual Labels
0,CANDIDATE,CANDIDATE
1,FALSE POSITIVE,FALSE POSITIVE
2,FALSE POSITIVE,FALSE POSITIVE
3,FALSE POSITIVE,FALSE POSITIVE
4,CANDIDATE,CANDIDATE
5,FALSE POSITIVE,FALSE POSITIVE
6,CANDIDATE,CANDIDATE
7,FALSE POSITIVE,FALSE POSITIVE
8,FALSE POSITIVE,FALSE POSITIVE
9,FALSE POSITIVE,FALSE POSITIVE


### Save the Model

In [12]:
# be sure to turn this in to BCS

model_dnn.save('deep_neural_network_model.h5')