In [1]:
# Update sklearn to prevent version mismatches
#!pip install sklearn --upgrade
#!pip install tensorflow --upgrade

In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
#!pip install joblib

In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Create a Train Test Split

Use `koi_disposition` for the y values

In [5]:
fields_to_use = ['koi_fpflag_co', 'koi_model_snr','koi_prad', 'koi_duration_err1', 'koi_teq', 'koi_impact']
X = df.loc[:,fields_to_use]
y = df.loc[:,'koi_disposition']

In [6]:
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, random_state=1) # split

In [8]:
X_train.head()

Unnamed: 0,koi_fpflag_co,koi_model_snr,koi_prad,koi_duration_err1,koi_teq,koi_impact
3563,0,11.7,3.89,0.143,899,1.017
4099,0,18.0,2.1,0.153,491,0.709
5460,0,476.0,14.59,0.0152,1276,0.262
1091,0,34.7,2.28,0.165,300,0.001
5999,0,8.7,2.27,0.939,568,0.2136


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [9]:
# Import Scaler and train it
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)

MinMaxScaler()

In [10]:
# Create scaled test and train
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled

array([[0.00000000e+00, 1.11563995e-03, 1.87176316e-05, 7.07920792e-03,
        5.53687822e-02, 1.00886852e-02],
       [0.00000000e+00, 1.81153417e-03, 9.78308212e-06, 7.57425743e-03,
        2.73756432e-02, 7.03331151e-03],
       [0.00000000e+00, 5.24019397e-02, 7.21252738e-05, 7.52475248e-04,
        8.12349914e-02, 2.59905164e-03],
       ...,
       [1.00000000e+00, 3.11937347e-02, 1.42703223e-04, 9.70297030e-04,
        7.76672384e-02, 1.25885364e-02],
       [0.00000000e+00, 2.77253096e-03, 1.45248821e-05, 6.28712871e-03,
        4.07547170e-02, 5.03938258e-03],
       [0.00000000e+00, 1.99931515e-03, 8.68498106e-06, 7.87128713e-03,
        3.52658662e-02, 8.25347698e-03]])

In [11]:
from tensorflow.keras.utils import to_categorical
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical.shape

(5243, 3)

In [12]:
X_train_scaled.shape

(5243, 6)

# Create / Train Neural Net Model

In [13]:
from tensorflow.keras.models import Sequential
model = Sequential()

In [14]:
from tensorflow.keras.layers import Dense
number_inputs = len(fields_to_use)
number_hidden_nodes = 5
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))

In [15]:
#Output layer - 3 Options
# FALSE POSITIVE, CONFIRMED, CANDIDATE
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [16]:
# Model Summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 5)                 35        
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 18        
Total params: 53
Trainable params: 53
Non-trainable params: 0
_________________________________________________________________


In [17]:
#Compile Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
164/164 - 1s - loss: 1.0394 - accuracy: 0.4536
Epoch 2/1000
164/164 - 0s - loss: 0.9613 - accuracy: 0.4949
Epoch 3/1000
164/164 - 0s - loss: 0.9039 - accuracy: 0.4949
Epoch 4/1000
164/164 - 0s - loss: 0.8634 - accuracy: 0.4946
Epoch 5/1000
164/164 - 0s - loss: 0.8364 - accuracy: 0.5085
Epoch 6/1000
164/164 - 0s - loss: 0.8157 - accuracy: 0.5764
Epoch 7/1000
164/164 - 0s - loss: 0.8013 - accuracy: 0.6002
Epoch 8/1000
164/164 - 0s - loss: 0.7909 - accuracy: 0.6157
Epoch 9/1000
164/164 - 0s - loss: 0.7832 - accuracy: 0.6197
Epoch 10/1000
164/164 - 0s - loss: 0.7774 - accuracy: 0.6206
Epoch 11/1000
164/164 - 0s - loss: 0.7727 - accuracy: 0.6206
Epoch 12/1000
164/164 - 0s - loss: 0.7693 - accuracy: 0.6256
Epoch 13/1000
164/164 - 0s - loss: 0.7661 - accuracy: 0.6311
Epoch 14/1000
164/164 - 0s - loss: 0.7636 - accuracy: 0.6250
Epoch 15/1000
164/164 - 0s - loss: 0.7610 - accuracy: 0.6250
Epoch 16/1000
164/164 - 0s - loss: 0.7588 - accuracy: 0.6325
Epoch 17/1000
164/164 - 0s - loss

Epoch 135/1000
164/164 - 0s - loss: 0.6103 - accuracy: 0.7305
Epoch 136/1000
164/164 - 0s - loss: 0.6098 - accuracy: 0.7353
Epoch 137/1000
164/164 - 0s - loss: 0.6098 - accuracy: 0.7316
Epoch 138/1000
164/164 - 0s - loss: 0.6092 - accuracy: 0.7351
Epoch 139/1000
164/164 - 0s - loss: 0.6092 - accuracy: 0.7320
Epoch 140/1000
164/164 - 0s - loss: 0.6087 - accuracy: 0.7345
Epoch 141/1000
164/164 - 0s - loss: 0.6082 - accuracy: 0.7332
Epoch 142/1000
164/164 - 0s - loss: 0.6082 - accuracy: 0.7337
Epoch 143/1000
164/164 - 0s - loss: 0.6078 - accuracy: 0.7330
Epoch 144/1000
164/164 - 0s - loss: 0.6075 - accuracy: 0.7328
Epoch 145/1000
164/164 - 0s - loss: 0.6074 - accuracy: 0.7318
Epoch 146/1000
164/164 - 0s - loss: 0.6074 - accuracy: 0.7335
Epoch 147/1000
164/164 - 0s - loss: 0.6069 - accuracy: 0.7351
Epoch 148/1000
164/164 - 0s - loss: 0.6066 - accuracy: 0.7356
Epoch 149/1000
164/164 - 0s - loss: 0.6067 - accuracy: 0.7347
Epoch 150/1000
164/164 - 0s - loss: 0.6062 - accuracy: 0.7345
Epoch 15

164/164 - 0s - loss: 0.5971 - accuracy: 0.7368
Epoch 268/1000
164/164 - 0s - loss: 0.5973 - accuracy: 0.7355
Epoch 269/1000
164/164 - 1s - loss: 0.5967 - accuracy: 0.7376
Epoch 270/1000
164/164 - 0s - loss: 0.5969 - accuracy: 0.7391
Epoch 271/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7372
Epoch 272/1000
164/164 - 0s - loss: 0.5967 - accuracy: 0.7377
Epoch 273/1000
164/164 - 0s - loss: 0.5967 - accuracy: 0.7362
Epoch 274/1000
164/164 - 0s - loss: 0.5967 - accuracy: 0.7355
Epoch 275/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7368
Epoch 276/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7366
Epoch 277/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7381
Epoch 278/1000
164/164 - 0s - loss: 0.5962 - accuracy: 0.7387
Epoch 279/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7366
Epoch 280/1000
164/164 - 0s - loss: 0.5965 - accuracy: 0.7391
Epoch 281/1000
164/164 - 0s - loss: 0.5968 - accuracy: 0.7397
Epoch 282/1000
164/164 - 0s - loss: 0.5963 - accuracy: 0.7379
Epoch 283/1000
164/164 

Epoch 400/1000
164/164 - 0s - loss: 0.5937 - accuracy: 0.7391
Epoch 401/1000
164/164 - 0s - loss: 0.5934 - accuracy: 0.7423
Epoch 402/1000
164/164 - 0s - loss: 0.5938 - accuracy: 0.7404
Epoch 403/1000
164/164 - 0s - loss: 0.5944 - accuracy: 0.7406
Epoch 404/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7397
Epoch 405/1000
164/164 - 0s - loss: 0.5944 - accuracy: 0.7397
Epoch 406/1000
164/164 - 0s - loss: 0.5938 - accuracy: 0.7395
Epoch 407/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7398
Epoch 408/1000
164/164 - 0s - loss: 0.5941 - accuracy: 0.7406
Epoch 409/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7400
Epoch 410/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7389
Epoch 411/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7385
Epoch 412/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7404
Epoch 413/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7412
Epoch 414/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7374
Epoch 415/1000
164/164 - 0s - loss: 0.5936 - accuracy: 0.7391
Epoch 41

164/164 - 0s - loss: 0.5915 - accuracy: 0.7427
Epoch 533/1000
164/164 - 0s - loss: 0.5917 - accuracy: 0.7372
Epoch 534/1000
164/164 - 0s - loss: 0.5921 - accuracy: 0.7418
Epoch 535/1000
164/164 - 0s - loss: 0.5922 - accuracy: 0.7410
Epoch 536/1000
164/164 - 0s - loss: 0.5922 - accuracy: 0.7395
Epoch 537/1000
164/164 - 0s - loss: 0.5925 - accuracy: 0.7400
Epoch 538/1000
164/164 - 0s - loss: 0.5920 - accuracy: 0.7406
Epoch 539/1000
164/164 - 0s - loss: 0.5917 - accuracy: 0.7431
Epoch 540/1000
164/164 - 0s - loss: 0.5921 - accuracy: 0.7416
Epoch 541/1000
164/164 - 0s - loss: 0.5919 - accuracy: 0.7404
Epoch 542/1000
164/164 - 0s - loss: 0.5919 - accuracy: 0.7404
Epoch 543/1000
164/164 - 1s - loss: 0.5916 - accuracy: 0.7404
Epoch 544/1000
164/164 - 0s - loss: 0.5914 - accuracy: 0.7438
Epoch 545/1000
164/164 - 0s - loss: 0.5915 - accuracy: 0.7412
Epoch 546/1000
164/164 - 0s - loss: 0.5918 - accuracy: 0.7410
Epoch 547/1000
164/164 - 0s - loss: 0.5922 - accuracy: 0.7418
Epoch 548/1000
164/164 

Epoch 665/1000
164/164 - 0s - loss: 0.5905 - accuracy: 0.7400
Epoch 666/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7438
Epoch 667/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7418
Epoch 668/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7423
Epoch 669/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7410
Epoch 670/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7398
Epoch 671/1000
164/164 - 0s - loss: 0.5902 - accuracy: 0.7425
Epoch 672/1000
164/164 - 0s - loss: 0.5900 - accuracy: 0.7402
Epoch 673/1000
164/164 - 0s - loss: 0.5898 - accuracy: 0.7414
Epoch 674/1000
164/164 - 1s - loss: 0.5902 - accuracy: 0.7438
Epoch 675/1000
164/164 - 0s - loss: 0.5904 - accuracy: 0.7404
Epoch 676/1000
164/164 - 0s - loss: 0.5903 - accuracy: 0.7452
Epoch 677/1000
164/164 - 0s - loss: 0.5901 - accuracy: 0.7408
Epoch 678/1000
164/164 - 0s - loss: 0.5899 - accuracy: 0.7435
Epoch 679/1000
164/164 - 0s - loss: 0.5898 - accuracy: 0.7418
Epoch 680/1000
164/164 - 0s - loss: 0.5900 - accuracy: 0.7444
Epoch 68

164/164 - 0s - loss: 0.5883 - accuracy: 0.7440
Epoch 798/1000
164/164 - 0s - loss: 0.5884 - accuracy: 0.7440
Epoch 799/1000
164/164 - 0s - loss: 0.5883 - accuracy: 0.7433
Epoch 800/1000
164/164 - 0s - loss: 0.5886 - accuracy: 0.7421
Epoch 801/1000
164/164 - 0s - loss: 0.5884 - accuracy: 0.7423
Epoch 802/1000
164/164 - 0s - loss: 0.5885 - accuracy: 0.7423
Epoch 803/1000
164/164 - 0s - loss: 0.5885 - accuracy: 0.7477
Epoch 804/1000
164/164 - 0s - loss: 0.5883 - accuracy: 0.7446
Epoch 805/1000
164/164 - 0s - loss: 0.5886 - accuracy: 0.7431
Epoch 806/1000
164/164 - 0s - loss: 0.5880 - accuracy: 0.7406
Epoch 807/1000
164/164 - 0s - loss: 0.5881 - accuracy: 0.7452
Epoch 808/1000
164/164 - 0s - loss: 0.5884 - accuracy: 0.7440
Epoch 809/1000
164/164 - 0s - loss: 0.5884 - accuracy: 0.7433
Epoch 810/1000
164/164 - 0s - loss: 0.5885 - accuracy: 0.7435
Epoch 811/1000
164/164 - 0s - loss: 0.5883 - accuracy: 0.7425
Epoch 812/1000
164/164 - 0s - loss: 0.5882 - accuracy: 0.7421
Epoch 813/1000
164/164 

Epoch 930/1000
164/164 - 0s - loss: 0.5863 - accuracy: 0.7442
Epoch 931/1000
164/164 - 0s - loss: 0.5868 - accuracy: 0.7448
Epoch 932/1000
164/164 - 0s - loss: 0.5865 - accuracy: 0.7448
Epoch 933/1000
164/164 - 0s - loss: 0.5869 - accuracy: 0.7463
Epoch 934/1000
164/164 - 0s - loss: 0.5862 - accuracy: 0.7458
Epoch 935/1000
164/164 - 0s - loss: 0.5864 - accuracy: 0.7452
Epoch 936/1000
164/164 - 0s - loss: 0.5862 - accuracy: 0.7459
Epoch 937/1000
164/164 - 0s - loss: 0.5866 - accuracy: 0.7458
Epoch 938/1000
164/164 - 0s - loss: 0.5862 - accuracy: 0.7431
Epoch 939/1000
164/164 - 0s - loss: 0.5865 - accuracy: 0.7459
Epoch 940/1000
164/164 - 0s - loss: 0.5859 - accuracy: 0.7454
Epoch 941/1000
164/164 - 0s - loss: 0.5860 - accuracy: 0.7450
Epoch 942/1000
164/164 - 0s - loss: 0.5863 - accuracy: 0.7442
Epoch 943/1000
164/164 - 0s - loss: 0.5865 - accuracy: 0.7425
Epoch 944/1000
164/164 - 0s - loss: 0.5867 - accuracy: 0.7435
Epoch 945/1000
164/164 - 0s - loss: 0.5863 - accuracy: 0.7454
Epoch 94

<keras.callbacks.History at 0x144e30580>

In [19]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

55/55 - 0s - loss: 0.5451 - accuracy: 0.7408
Loss: 0.5450627207756042, Accuracy: 0.7408466935157776
