# Neural Network (NN) Model

In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\bartc\.conda\envs\pythonadv\lib\site-packages (0.0)


In [2]:
# Update sklearn to prevent version mismatches
!pip install tensorflow==2.2 --upgrade
!pip install keras --upgrade

Requirement already up-to-date: tensorflow==2.2 in c:\users\bartc\.conda\envs\pythonadv\lib\site-packages (2.2.0)
Requirement already up-to-date: keras in c:\users\bartc\.conda\envs\pythonadv\lib\site-packages (2.4.3)


In [3]:
# Install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [5]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Select your features (columns)

In [6]:
# Set features. This will also be used as your x values.
#data = df.drop("koi_disposition", axis=1)
#selected_features = data.columns
selected_features = df[['koi_fpflag_nt','koi_fpflag_ss','koi_fpflag_co','koi_fpflag_ec',
                        'koi_period','koi_period_err1','koi_period_err2',
                        'koi_time0bk','koi_time0bk_err1','koi_time0bk_err2',
                        'koi_impact','koi_impact_err1','koi_impact_err2',
                        'koi_duration','koi_duration_err1','koi_duration_err2',
                        'koi_depth','koi_depth_err1','koi_depth_err2',
                        'koi_prad','koi_prad_err1','koi_prad_err2',
                        'koi_teq','koi_insol','koi_insol_err1','koi_insol_err2',
                        'koi_model_snr','koi_steff','koi_steff_err1','koi_steff_err2',
                        'koi_slogg','koi_slogg_err1','koi_slogg_err2',
                        'koi_srad','koi_srad_err1','koi_srad_err2',
                        'ra','dec','koi_kepmag']]
selected_features.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,-0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,-0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,-0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,-0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,-0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Create a Train Test Split

Use `koi_disposition` for the y values

In [7]:
# Define target dataframe, target_names array, and X and y variables
target = df["koi_disposition"]
target_names = ["Confirmed", "False Positive", "Candidate"]
X = selected_features
y = target

# Derive X and y training and testing variables
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [8]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
6080,1,0,0,0,12.496435,0.0002213,-0.0002213,132.0358,0.0143,-0.0143,...,-286,3.805,0.39,-0.13,2.73,0.535,-1.248,289.2308,44.412483,13.054
3001,0,0,0,0,11.615625,0.0001528,-0.0001528,131.96843,0.00823,-0.00823,...,-72,4.083,0.368,-0.092,1.453,0.218,-0.51,293.52756,41.111439,15.162
570,0,1,0,0,10.980246,6.93e-07,-6.93e-07,137.137607,5.3e-05,-5.3e-05,...,-159,4.462,0.098,-0.182,0.897,0.238,-0.119,282.79764,43.578129,14.212
4897,1,0,0,0,466.90824,0.01194,-0.01194,136.3731,0.019,-0.019,...,-146,4.456,0.102,-0.361,0.867,0.448,-0.103,297.65436,43.178551,15.202
625,0,1,1,1,1.061933,1.25e-06,-1.25e-06,133.850441,0.000978,-0.000978,...,-167,3.975,0.259,-0.111,1.851,0.383,-0.575,288.90253,44.632992,12.953


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [9]:
# Scale your data
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Label-encode data set and print the encoded_y_test
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
print(encoded_y_test)

[0 2 2 ... 2 2 1]


# Train the Model



In [14]:
from tensorflow.keras.models import Sequential
model5 = Sequential()

from tensorflow.keras.layers import Dense
number_inputs = 39
number_hidden_nodes = 6
model5.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))
number_classes = 3
model5.add(Dense(units=number_classes, activation='softmax'))
model5.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 6)                 240       
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 21        
Total params: 261
Trainable params: 261
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Fit the data and print Training Data Scores
model5.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model5.fit(X_train_scaled, y_train_categorical, epochs=1000, shuffle=True, verbose=2)
#print(f"Training Data Score: {model5.score(X_train_scaled, encoded_y_train)}")
#print(f"Testing Data Score: {model5.score(X_test_scaled, encoded_y_test)}")

Train on 5243 samples
Epoch 1/1000
5243/5243 - 0s - loss: 0.9389 - accuracy: 0.4793
Epoch 2/1000
5243/5243 - 0s - loss: 0.7382 - accuracy: 0.6838
Epoch 3/1000
5243/5243 - 0s - loss: 0.5980 - accuracy: 0.7738
Epoch 4/1000
5243/5243 - 0s - loss: 0.5127 - accuracy: 0.7908
Epoch 5/1000
5243/5243 - 0s - loss: 0.4620 - accuracy: 0.8079
Epoch 6/1000
5243/5243 - 0s - loss: 0.4321 - accuracy: 0.8058
Epoch 7/1000
5243/5243 - 0s - loss: 0.4125 - accuracy: 0.8091
Epoch 8/1000
5243/5243 - 0s - loss: 0.3994 - accuracy: 0.8097
Epoch 9/1000
5243/5243 - 0s - loss: 0.3897 - accuracy: 0.8171
Epoch 10/1000
5243/5243 - 0s - loss: 0.3825 - accuracy: 0.8158
Epoch 11/1000
5243/5243 - 0s - loss: 0.3763 - accuracy: 0.8247
Epoch 12/1000
5243/5243 - 0s - loss: 0.3708 - accuracy: 0.8230
Epoch 13/1000
5243/5243 - 0s - loss: 0.3671 - accuracy: 0.8255
Epoch 14/1000
5243/5243 - 0s - loss: 0.3627 - accuracy: 0.8299
Epoch 15/1000
5243/5243 - 0s - loss: 0.3595 - accuracy: 0.8297
Epoch 16/1000
5243/5243 - 0s - loss: 0.356

Epoch 131/1000
5243/5243 - 0s - loss: 0.2912 - accuracy: 0.8798
Epoch 132/1000
5243/5243 - 0s - loss: 0.2900 - accuracy: 0.8770
Epoch 133/1000
5243/5243 - 0s - loss: 0.2912 - accuracy: 0.8783
Epoch 134/1000
5243/5243 - 0s - loss: 0.2893 - accuracy: 0.8796
Epoch 135/1000
5243/5243 - 0s - loss: 0.2903 - accuracy: 0.8800
Epoch 136/1000
5243/5243 - 0s - loss: 0.2898 - accuracy: 0.8810
Epoch 137/1000
5243/5243 - 0s - loss: 0.2902 - accuracy: 0.8798
Epoch 138/1000
5243/5243 - 0s - loss: 0.2895 - accuracy: 0.8770
Epoch 139/1000
5243/5243 - 0s - loss: 0.2898 - accuracy: 0.8802
Epoch 140/1000
5243/5243 - 0s - loss: 0.2899 - accuracy: 0.8783
Epoch 141/1000
5243/5243 - 0s - loss: 0.2892 - accuracy: 0.8777
Epoch 142/1000
5243/5243 - 0s - loss: 0.2885 - accuracy: 0.8840
Epoch 143/1000
5243/5243 - 0s - loss: 0.2893 - accuracy: 0.8804
Epoch 144/1000
5243/5243 - 0s - loss: 0.2889 - accuracy: 0.8804
Epoch 145/1000
5243/5243 - 0s - loss: 0.2889 - accuracy: 0.8791
Epoch 146/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2762 - accuracy: 0.8858
Epoch 260/1000
5243/5243 - 0s - loss: 0.2766 - accuracy: 0.8858
Epoch 261/1000
5243/5243 - 0s - loss: 0.2769 - accuracy: 0.8837
Epoch 262/1000
5243/5243 - 0s - loss: 0.2759 - accuracy: 0.8844
Epoch 263/1000
5243/5243 - 0s - loss: 0.2762 - accuracy: 0.8817
Epoch 264/1000
5243/5243 - 0s - loss: 0.2752 - accuracy: 0.8875
Epoch 265/1000
5243/5243 - 0s - loss: 0.2760 - accuracy: 0.8852
Epoch 266/1000
5243/5243 - 0s - loss: 0.2769 - accuracy: 0.8831
Epoch 267/1000
5243/5243 - 0s - loss: 0.2768 - accuracy: 0.8823
Epoch 268/1000
5243/5243 - 0s - loss: 0.2768 - accuracy: 0.8867
Epoch 269/1000
5243/5243 - 0s - loss: 0.2758 - accuracy: 0.8848
Epoch 270/1000
5243/5243 - 0s - loss: 0.2758 - accuracy: 0.8852
Epoch 271/1000
5243/5243 - 0s - loss: 0.2748 - accuracy: 0.8856
Epoch 272/1000
5243/5243 - 0s - loss: 0.2757 - accuracy: 0.8844
Epoch 273/1000
5243/5243 - 0s - loss: 0.2755 - accuracy: 0.8858
Epoch 274/1000
5243/5243 - 0s - loss: 0.2757 - accuracy

Epoch 388/1000
5243/5243 - 0s - loss: 0.2662 - accuracy: 0.8896
Epoch 389/1000
5243/5243 - 0s - loss: 0.2664 - accuracy: 0.8890
Epoch 390/1000
5243/5243 - 0s - loss: 0.2671 - accuracy: 0.8907
Epoch 391/1000
5243/5243 - 0s - loss: 0.2665 - accuracy: 0.8894
Epoch 392/1000
5243/5243 - 0s - loss: 0.2656 - accuracy: 0.8884
Epoch 393/1000
5243/5243 - 0s - loss: 0.2669 - accuracy: 0.8913
Epoch 394/1000
5243/5243 - 0s - loss: 0.2664 - accuracy: 0.8877
Epoch 395/1000
5243/5243 - 0s - loss: 0.2661 - accuracy: 0.8894
Epoch 396/1000
5243/5243 - 0s - loss: 0.2656 - accuracy: 0.8919
Epoch 397/1000
5243/5243 - 0s - loss: 0.2650 - accuracy: 0.8894
Epoch 398/1000
5243/5243 - 0s - loss: 0.2666 - accuracy: 0.8880
Epoch 399/1000
5243/5243 - 0s - loss: 0.2658 - accuracy: 0.8896
Epoch 400/1000
5243/5243 - 0s - loss: 0.2651 - accuracy: 0.8901
Epoch 401/1000
5243/5243 - 0s - loss: 0.2660 - accuracy: 0.8873
Epoch 402/1000
5243/5243 - 0s - loss: 0.2660 - accuracy: 0.8888
Epoch 403/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2591 - accuracy: 0.8909
Epoch 517/1000
5243/5243 - 0s - loss: 0.2580 - accuracy: 0.8940
Epoch 518/1000
5243/5243 - 0s - loss: 0.2583 - accuracy: 0.8945
Epoch 519/1000
5243/5243 - 0s - loss: 0.2585 - accuracy: 0.8959
Epoch 520/1000
5243/5243 - 0s - loss: 0.2578 - accuracy: 0.8930
Epoch 521/1000
5243/5243 - 0s - loss: 0.2606 - accuracy: 0.8919
Epoch 522/1000
5243/5243 - 0s - loss: 0.2587 - accuracy: 0.8917
Epoch 523/1000
5243/5243 - 0s - loss: 0.2586 - accuracy: 0.8922
Epoch 524/1000
5243/5243 - 0s - loss: 0.2583 - accuracy: 0.8936
Epoch 525/1000
5243/5243 - 0s - loss: 0.2589 - accuracy: 0.8917
Epoch 526/1000
5243/5243 - 0s - loss: 0.2592 - accuracy: 0.8934
Epoch 527/1000
5243/5243 - 0s - loss: 0.2585 - accuracy: 0.8949
Epoch 528/1000
5243/5243 - 0s - loss: 0.2577 - accuracy: 0.8938
Epoch 529/1000
5243/5243 - 0s - loss: 0.2582 - accuracy: 0.8949
Epoch 530/1000
5243/5243 - 0s - loss: 0.2577 - accuracy: 0.8949
Epoch 531/1000
5243/5243 - 0s - loss: 0.2578 - accuracy

Epoch 645/1000
5243/5243 - 0s - loss: 0.2559 - accuracy: 0.8920
Epoch 646/1000
5243/5243 - 0s - loss: 0.2553 - accuracy: 0.8930
Epoch 647/1000
5243/5243 - 0s - loss: 0.2541 - accuracy: 0.8959
Epoch 648/1000
5243/5243 - 0s - loss: 0.2544 - accuracy: 0.8940
Epoch 649/1000
5243/5243 - 0s - loss: 0.2554 - accuracy: 0.8943
Epoch 650/1000
5243/5243 - 0s - loss: 0.2541 - accuracy: 0.8945
Epoch 651/1000
5243/5243 - 0s - loss: 0.2542 - accuracy: 0.8920
Epoch 652/1000
5243/5243 - 0s - loss: 0.2539 - accuracy: 0.8941
Epoch 653/1000
5243/5243 - 0s - loss: 0.2552 - accuracy: 0.8930
Epoch 654/1000
5243/5243 - 0s - loss: 0.2564 - accuracy: 0.8928
Epoch 655/1000
5243/5243 - 0s - loss: 0.2529 - accuracy: 0.8955
Epoch 656/1000
5243/5243 - 0s - loss: 0.2542 - accuracy: 0.8959
Epoch 657/1000
5243/5243 - 0s - loss: 0.2550 - accuracy: 0.8928
Epoch 658/1000
5243/5243 - 0s - loss: 0.2536 - accuracy: 0.8938
Epoch 659/1000
5243/5243 - 0s - loss: 0.2557 - accuracy: 0.8924
Epoch 660/1000
5243/5243 - 0s - loss: 0.

5243/5243 - 0s - loss: 0.2515 - accuracy: 0.8962
Epoch 774/1000
5243/5243 - 0s - loss: 0.2512 - accuracy: 0.8953
Epoch 775/1000
5243/5243 - 0s - loss: 0.2512 - accuracy: 0.8930
Epoch 776/1000
5243/5243 - 0s - loss: 0.2516 - accuracy: 0.8941
Epoch 777/1000
5243/5243 - 0s - loss: 0.2529 - accuracy: 0.8957
Epoch 778/1000
5243/5243 - 0s - loss: 0.2504 - accuracy: 0.8959
Epoch 779/1000
5243/5243 - 0s - loss: 0.2504 - accuracy: 0.8957
Epoch 780/1000
5243/5243 - 0s - loss: 0.2526 - accuracy: 0.8953
Epoch 781/1000
5243/5243 - 0s - loss: 0.2505 - accuracy: 0.8985
Epoch 782/1000
5243/5243 - 0s - loss: 0.2530 - accuracy: 0.8934
Epoch 783/1000
5243/5243 - 0s - loss: 0.2507 - accuracy: 0.8978
Epoch 784/1000
5243/5243 - 0s - loss: 0.2508 - accuracy: 0.8955
Epoch 785/1000
5243/5243 - 0s - loss: 0.2523 - accuracy: 0.8932
Epoch 786/1000
5243/5243 - 0s - loss: 0.2492 - accuracy: 0.8947
Epoch 787/1000
5243/5243 - 0s - loss: 0.2503 - accuracy: 0.8966
Epoch 788/1000
5243/5243 - 0s - loss: 0.2502 - accuracy

Epoch 902/1000
5243/5243 - 0s - loss: 0.2473 - accuracy: 0.8972
Epoch 903/1000
5243/5243 - 0s - loss: 0.2475 - accuracy: 0.8957
Epoch 904/1000
5243/5243 - 0s - loss: 0.2483 - accuracy: 0.8953
Epoch 905/1000
5243/5243 - 0s - loss: 0.2463 - accuracy: 0.8974
Epoch 906/1000
5243/5243 - 0s - loss: 0.2476 - accuracy: 0.8983
Epoch 907/1000
5243/5243 - 0s - loss: 0.2472 - accuracy: 0.8980
Epoch 908/1000
5243/5243 - 0s - loss: 0.2493 - accuracy: 0.8978
Epoch 909/1000
5243/5243 - 0s - loss: 0.2468 - accuracy: 0.8964
Epoch 910/1000
5243/5243 - 0s - loss: 0.2455 - accuracy: 0.8980
Epoch 911/1000
5243/5243 - 0s - loss: 0.2481 - accuracy: 0.8953
Epoch 912/1000
5243/5243 - 0s - loss: 0.2471 - accuracy: 0.8970
Epoch 913/1000
5243/5243 - 0s - loss: 0.2468 - accuracy: 0.8955
Epoch 914/1000
5243/5243 - 0s - loss: 0.2465 - accuracy: 0.8980
Epoch 915/1000
5243/5243 - 0s - loss: 0.2469 - accuracy: 0.8938
Epoch 916/1000
5243/5243 - 0s - loss: 0.2472 - accuracy: 0.8966
Epoch 917/1000
5243/5243 - 0s - loss: 0.

<tensorflow.python.keras.callbacks.History at 0x2681534eb48>

# Test NN Model

In [16]:
model_loss, model_accuracy = model5.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1748/1 - 0s - loss: 0.2811 - accuracy: 0.8982
Loss: 0.24155302847004592, Accuracy: 0.8981693387031555
