# Neural Network

In [1]:
# Imports
# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow

from numpy.random import seed
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [2]:
# Set the seed value for the notebook so the results are reproducible
seed(42)

In [3]:
# Read the csv file into a pandas DataFrame
exoplanet_complete_kNN = pd.read_csv('exoplanet_complete_kNN.csv')
exoplanet_complete_kNN.head()

Unnamed: 0,koi_disposition,koi_score,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,1,1.0,9.488036,170.53875,0.146,2.9575,615.8,2.26,793,93.59,5455,4.467,0.927,291.93423,48.141651,15.347
1,1,0.969,54.418383,162.51384,0.586,4.507,874.8,2.83,443,9.11,5455,4.467,0.927,291.93423,48.141651,15.347
2,3,0.0,19.89914,175.850252,0.969,1.7822,10829.0,14.6,638,39.3,5853,4.544,0.868,297.00482,48.134129,15.436
3,3,0.0,1.736952,170.307565,1.276,2.40641,8079.2,33.46,1395,891.96,5805,4.564,0.791,285.53461,48.28521,15.597
4,1,1.0,2.525592,171.59555,0.701,1.6545,603.3,2.75,1406,926.16,6031,4.438,1.046,288.75488,48.2262,15.509


In [4]:
# Check tensorflow.keras version
tensorflow.keras.__version__

'2.2.4-tf'

## Data Preprocessing

In [5]:
# Drop koi_disposition from the X values, use it as our dependent variable y
X = exoplanet_complete_kNN.drop("koi_disposition", axis=1)
y = exoplanet_complete_kNN["koi_disposition"]
print(X.shape, y.shape)

(7994, 15) (7994,)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

In [7]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [8]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [9]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical


array([[0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

## Create a Deep Learning Model

In [10]:
# Create model and add layers
model = Sequential()

In [11]:
number_inputs = 15
number_hidden_nodes = 45
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [12]:
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [13]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 45)                720       
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 138       
Total params: 858
Trainable params: 858
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 5995 samples
Epoch 1/100
5995/5995 - 1s - loss: 0.7530 - accuracy: 0.7133
Epoch 2/100
5995/5995 - 0s - loss: 0.4560 - accuracy: 0.7738
Epoch 3/100
5995/5995 - 0s - loss: 0.4064 - accuracy: 0.7908
Epoch 4/100
5995/5995 - 0s - loss: 0.3956 - accuracy: 0.7943
Epoch 5/100
5995/5995 - 0s - loss: 0.3905 - accuracy: 0.7975
Epoch 6/100
5995/5995 - 0s - loss: 0.3880 - accuracy: 0.8002
Epoch 7/100
5995/5995 - 0s - loss: 0.3860 - accuracy: 0.8008
Epoch 8/100
5995/5995 - 0s - loss: 0.3864 - accuracy: 0.7987
Epoch 9/100
5995/5995 - 0s - loss: 0.3835 - accuracy: 0.8023
Epoch 10/100
5995/5995 - 0s - loss: 0.3818 - accuracy: 0.8030
Epoch 11/100
5995/5995 - 0s - loss: 0.3806 - accuracy: 0.8040
Epoch 12/100
5995/5995 - 0s - loss: 0.3799 - accuracy: 0.8097
Epoch 13/100
5995/5995 - 0s - loss: 0.3782 - accuracy: 0.8038
Epoch 14/100
5995/5995 - 0s - loss: 0.3776 - accuracy: 0.8102
Epoch 15/100
5995/5995 - 0s - loss: 0.3766 - accuracy: 0.8035
Epoch 16/100
5995/5995 - 0s - loss: 0.3759 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x113746278>

## Quantify the trained model

In [16]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1999/1999 - 0s - loss: 0.3553 - accuracy: 0.8244
Normal Neural Network - Loss: 0.355268792026099, Accuracy: 0.8244122266769409


# Summary:

### Neural Network: 82.4%