In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade
!pip install tensorflow --upgrade

Requirement already up-to-date: sklearn in /opt/anaconda3/lib/python3.8/site-packages (0.0)
Requirement already up-to-date: tensorflow in /opt/anaconda3/lib/python3.8/site-packages (2.6.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


# Create a Train Test Split

Use `koi_disposition` for the y values

In [5]:
X = df.drop('koi_disposition',axis=1)
y = df.loc[:,'koi_disposition']

In [6]:
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, random_state=1) # split

In [8]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
3563,0,0,0,0,10.548413,5.47e-05,-5.47e-05,139.06402,0.00411,-0.00411,...,-133,4.387,0.066,-0.123,1.092,0.181,-0.097,298.09543,44.737061,13.204
4099,0,0,0,0,24.754385,0.0001365,-0.0001365,140.20732,0.00446,-0.00446,...,-144,4.519,0.078,-0.052,0.804,0.056,-0.076,295.73535,42.576248,15.514
5460,0,0,0,0,1.057336,1.23e-07,-1.23e-07,131.792007,9.6e-05,-9.6e-05,...,-140,4.594,0.054,-0.027,0.683,0.054,-0.06,292.18417,49.31004,15.414
1091,0,0,0,0,201.118319,0.001461,-0.001461,187.56986,0.00529,-0.00529,...,-112,4.447,0.072,-0.108,0.954,0.135,-0.083,283.11377,48.13139,13.328
5999,0,0,0,0,91.649983,0.003181,-0.003181,175.7156,0.0286,-0.0286,...,-233,4.145,0.164,-0.164,1.608,0.905,-0.383,294.93198,39.81242,12.964


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [9]:
# Import Scaler and train it
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)

MinMaxScaler()

In [10]:
# Create scaled test and train
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
from tensorflow.keras.utils import to_categorical
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_train_categorical.shape

(5243, 3)

In [12]:
X_train_scaled.shape

(5243, 40)

# Create / Train Neural Net Model

In [13]:
from tensorflow.keras.models import Sequential
model = Sequential()

In [14]:
from tensorflow.keras.layers import Dense
number_inputs = 40
number_hidden_nodes = 10
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_inputs))
model.add(Dense(units=number_hidden_nodes,
                activation='relu', input_dim=number_hidden_nodes))

In [15]:
#Output layer - 3 Options
# FALSE POSITIVE, CONFIRMED, CANDIDATE
number_classes = 3
model.add(Dense(units=number_classes, activation='softmax'))

In [16]:
# Model Summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                410       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 33        
Total params: 773
Trainable params: 773
Non-trainable params: 0
_________________________________________________________________


In [17]:
#Compile Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
164/164 - 2s - loss: 0.8773 - accuracy: 0.6344
Epoch 2/1000
164/164 - 0s - loss: 0.4138 - accuracy: 0.7719
Epoch 3/1000
164/164 - 0s - loss: 0.3911 - accuracy: 0.7898
Epoch 4/1000
164/164 - 0s - loss: 0.3787 - accuracy: 0.8114
Epoch 5/1000
164/164 - 0s - loss: 0.3748 - accuracy: 0.8110
Epoch 6/1000
164/164 - 0s - loss: 0.3687 - accuracy: 0.8167
Epoch 7/1000
164/164 - 0s - loss: 0.3633 - accuracy: 0.8167
Epoch 8/1000
164/164 - 1s - loss: 0.3598 - accuracy: 0.8175
Epoch 9/1000
164/164 - 0s - loss: 0.3548 - accuracy: 0.8232
Epoch 10/1000
164/164 - 0s - loss: 0.3520 - accuracy: 0.8266
Epoch 11/1000
164/164 - 0s - loss: 0.3513 - accuracy: 0.8257
Epoch 12/1000
164/164 - 0s - loss: 0.3555 - accuracy: 0.8219
Epoch 13/1000
164/164 - 0s - loss: 0.3460 - accuracy: 0.8325
Epoch 14/1000
164/164 - 0s - loss: 0.3450 - accuracy: 0.8325
Epoch 15/1000
164/164 - 0s - loss: 0.3424 - accuracy: 0.8356
Epoch 16/1000
164/164 - 0s - loss: 0.3380 - accuracy: 0.8358
Epoch 17/1000
164/164 - 0s - loss

Epoch 135/1000
164/164 - 0s - loss: 0.2538 - accuracy: 0.8934
Epoch 136/1000
164/164 - 0s - loss: 0.2618 - accuracy: 0.8890
Epoch 137/1000
164/164 - 0s - loss: 0.2520 - accuracy: 0.8919
Epoch 138/1000
164/164 - 0s - loss: 0.2645 - accuracy: 0.8875
Epoch 139/1000
164/164 - 0s - loss: 0.2523 - accuracy: 0.8930
Epoch 140/1000
164/164 - 0s - loss: 0.2549 - accuracy: 0.8917
Epoch 141/1000
164/164 - 0s - loss: 0.2499 - accuracy: 0.8907
Epoch 142/1000
164/164 - 0s - loss: 0.2501 - accuracy: 0.8928
Epoch 143/1000
164/164 - 0s - loss: 0.2539 - accuracy: 0.8890
Epoch 144/1000
164/164 - 0s - loss: 0.2529 - accuracy: 0.8888
Epoch 145/1000
164/164 - 0s - loss: 0.2543 - accuracy: 0.8894
Epoch 146/1000
164/164 - 0s - loss: 0.2497 - accuracy: 0.8899
Epoch 147/1000
164/164 - 0s - loss: 0.2510 - accuracy: 0.8936
Epoch 148/1000
164/164 - 0s - loss: 0.2508 - accuracy: 0.8920
Epoch 149/1000
164/164 - 0s - loss: 0.2493 - accuracy: 0.8936
Epoch 150/1000
164/164 - 0s - loss: 0.2552 - accuracy: 0.8922
Epoch 15

164/164 - 0s - loss: 0.2393 - accuracy: 0.8968
Epoch 268/1000
164/164 - 1s - loss: 0.2395 - accuracy: 0.8983
Epoch 269/1000
164/164 - 1s - loss: 0.2364 - accuracy: 0.9001
Epoch 270/1000
164/164 - 1s - loss: 0.2330 - accuracy: 0.9022
Epoch 271/1000
164/164 - 1s - loss: 0.2355 - accuracy: 0.9022
Epoch 272/1000
164/164 - 1s - loss: 0.2346 - accuracy: 0.8989
Epoch 273/1000
164/164 - 0s - loss: 0.2386 - accuracy: 0.8966
Epoch 274/1000
164/164 - 0s - loss: 0.2352 - accuracy: 0.9018
Epoch 275/1000
164/164 - 0s - loss: 0.2400 - accuracy: 0.8951
Epoch 276/1000
164/164 - 0s - loss: 0.2369 - accuracy: 0.8987
Epoch 277/1000
164/164 - 0s - loss: 0.2409 - accuracy: 0.8951
Epoch 278/1000
164/164 - 0s - loss: 0.2336 - accuracy: 0.8993
Epoch 279/1000
164/164 - 0s - loss: 0.2410 - accuracy: 0.9012
Epoch 280/1000
164/164 - 0s - loss: 0.2385 - accuracy: 0.8970
Epoch 281/1000
164/164 - 1s - loss: 0.2392 - accuracy: 0.8976
Epoch 282/1000
164/164 - 1s - loss: 0.2348 - accuracy: 0.8983
Epoch 283/1000
164/164 

Epoch 400/1000
164/164 - 0s - loss: 0.2291 - accuracy: 0.9008
Epoch 401/1000
164/164 - 0s - loss: 0.2248 - accuracy: 0.9033
Epoch 402/1000
164/164 - 0s - loss: 0.2277 - accuracy: 0.9014
Epoch 403/1000
164/164 - 0s - loss: 0.2257 - accuracy: 0.9010
Epoch 404/1000
164/164 - 0s - loss: 0.2240 - accuracy: 0.9065
Epoch 405/1000
164/164 - 0s - loss: 0.2267 - accuracy: 0.9020
Epoch 406/1000
164/164 - 0s - loss: 0.2308 - accuracy: 0.9046
Epoch 407/1000
164/164 - 0s - loss: 0.2264 - accuracy: 0.9010
Epoch 408/1000
164/164 - 0s - loss: 0.2260 - accuracy: 0.9023
Epoch 409/1000
164/164 - 0s - loss: 0.2233 - accuracy: 0.9062
Epoch 410/1000
164/164 - 0s - loss: 0.2276 - accuracy: 0.9004
Epoch 411/1000
164/164 - 0s - loss: 0.2267 - accuracy: 0.9022
Epoch 412/1000
164/164 - 0s - loss: 0.2323 - accuracy: 0.9008
Epoch 413/1000
164/164 - 0s - loss: 0.2249 - accuracy: 0.9022
Epoch 414/1000
164/164 - 0s - loss: 0.2246 - accuracy: 0.9035
Epoch 415/1000
164/164 - 0s - loss: 0.2325 - accuracy: 0.8985
Epoch 41

164/164 - 0s - loss: 0.2188 - accuracy: 0.9035
Epoch 533/1000
164/164 - 0s - loss: 0.2160 - accuracy: 0.9083
Epoch 534/1000
164/164 - 0s - loss: 0.2210 - accuracy: 0.9037
Epoch 535/1000
164/164 - 0s - loss: 0.2244 - accuracy: 0.9014
Epoch 536/1000
164/164 - 0s - loss: 0.2157 - accuracy: 0.9058
Epoch 537/1000
164/164 - 0s - loss: 0.2171 - accuracy: 0.9025
Epoch 538/1000
164/164 - 0s - loss: 0.2160 - accuracy: 0.9088
Epoch 539/1000
164/164 - 0s - loss: 0.2177 - accuracy: 0.9060
Epoch 540/1000
164/164 - 0s - loss: 0.2219 - accuracy: 0.9056
Epoch 541/1000
164/164 - 0s - loss: 0.2146 - accuracy: 0.9060
Epoch 542/1000
164/164 - 0s - loss: 0.2175 - accuracy: 0.9052
Epoch 543/1000
164/164 - 0s - loss: 0.2189 - accuracy: 0.9056
Epoch 544/1000
164/164 - 0s - loss: 0.2221 - accuracy: 0.9062
Epoch 545/1000
164/164 - 0s - loss: 0.2216 - accuracy: 0.9044
Epoch 546/1000
164/164 - 0s - loss: 0.2169 - accuracy: 0.9075
Epoch 547/1000
164/164 - 0s - loss: 0.2222 - accuracy: 0.9044
Epoch 548/1000
164/164 

Epoch 665/1000
164/164 - 0s - loss: 0.2202 - accuracy: 0.9060
Epoch 666/1000
164/164 - 0s - loss: 0.2114 - accuracy: 0.9100
Epoch 667/1000
164/164 - 0s - loss: 0.2122 - accuracy: 0.9088
Epoch 668/1000
164/164 - 0s - loss: 0.2128 - accuracy: 0.9056
Epoch 669/1000
164/164 - 0s - loss: 0.2142 - accuracy: 0.9029
Epoch 670/1000
164/164 - 0s - loss: 0.2123 - accuracy: 0.9079
Epoch 671/1000
164/164 - 0s - loss: 0.2125 - accuracy: 0.9048
Epoch 672/1000
164/164 - 0s - loss: 0.2156 - accuracy: 0.9046
Epoch 673/1000
164/164 - 0s - loss: 0.2108 - accuracy: 0.9096
Epoch 674/1000
164/164 - 0s - loss: 0.2166 - accuracy: 0.9064
Epoch 675/1000
164/164 - 0s - loss: 0.2156 - accuracy: 0.9037
Epoch 676/1000
164/164 - 0s - loss: 0.2122 - accuracy: 0.9090
Epoch 677/1000
164/164 - 0s - loss: 0.2149 - accuracy: 0.9048
Epoch 678/1000
164/164 - 0s - loss: 0.2132 - accuracy: 0.9062
Epoch 679/1000
164/164 - 0s - loss: 0.2136 - accuracy: 0.9098
Epoch 680/1000
164/164 - 0s - loss: 0.2158 - accuracy: 0.9069
Epoch 68

164/164 - 0s - loss: 0.2104 - accuracy: 0.9075
Epoch 798/1000
164/164 - 0s - loss: 0.2156 - accuracy: 0.9064
Epoch 799/1000
164/164 - 0s - loss: 0.2065 - accuracy: 0.9050
Epoch 800/1000
164/164 - 0s - loss: 0.2090 - accuracy: 0.9079
Epoch 801/1000
164/164 - 0s - loss: 0.2064 - accuracy: 0.9092
Epoch 802/1000
164/164 - 0s - loss: 0.2035 - accuracy: 0.9109
Epoch 803/1000
164/164 - 0s - loss: 0.2154 - accuracy: 0.9052
Epoch 804/1000
164/164 - 0s - loss: 0.2099 - accuracy: 0.9081
Epoch 805/1000
164/164 - 0s - loss: 0.2083 - accuracy: 0.9083
Epoch 806/1000
164/164 - 0s - loss: 0.2083 - accuracy: 0.9060
Epoch 807/1000
164/164 - 0s - loss: 0.2090 - accuracy: 0.9075
Epoch 808/1000
164/164 - 0s - loss: 0.2042 - accuracy: 0.9113
Epoch 809/1000
164/164 - 0s - loss: 0.2090 - accuracy: 0.9107
Epoch 810/1000
164/164 - 0s - loss: 0.2078 - accuracy: 0.9062
Epoch 811/1000
164/164 - 0s - loss: 0.2114 - accuracy: 0.9088
Epoch 812/1000
164/164 - 0s - loss: 0.2054 - accuracy: 0.9100
Epoch 813/1000
164/164 

Epoch 930/1000
164/164 - 0s - loss: 0.2041 - accuracy: 0.9069
Epoch 931/1000
164/164 - 0s - loss: 0.2003 - accuracy: 0.9128
Epoch 932/1000
164/164 - 0s - loss: 0.2013 - accuracy: 0.9092
Epoch 933/1000
164/164 - 0s - loss: 0.2023 - accuracy: 0.9126
Epoch 934/1000
164/164 - 0s - loss: 0.2070 - accuracy: 0.9105
Epoch 935/1000
164/164 - 0s - loss: 0.2010 - accuracy: 0.9107
Epoch 936/1000
164/164 - 0s - loss: 0.1994 - accuracy: 0.9138
Epoch 937/1000
164/164 - 0s - loss: 0.2003 - accuracy: 0.9117
Epoch 938/1000
164/164 - 0s - loss: 0.2003 - accuracy: 0.9113
Epoch 939/1000
164/164 - 0s - loss: 0.2052 - accuracy: 0.9077
Epoch 940/1000
164/164 - 0s - loss: 0.2064 - accuracy: 0.9083
Epoch 941/1000
164/164 - 0s - loss: 0.2053 - accuracy: 0.9105
Epoch 942/1000
164/164 - 0s - loss: 0.2049 - accuracy: 0.9119
Epoch 943/1000
164/164 - 0s - loss: 0.2066 - accuracy: 0.9138
Epoch 944/1000
164/164 - 0s - loss: 0.2093 - accuracy: 0.9100
Epoch 945/1000
164/164 - 0s - loss: 0.2072 - accuracy: 0.9123
Epoch 94

<keras.callbacks.History at 0x142765220>

In [19]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

55/55 - 0s - loss: 0.3841 - accuracy: 0.9016
Loss: 0.3840610384941101, Accuracy: 0.9016018509864807


### Save The Model

In [22]:
import joblib
filename = 'Anastasiades.h5'
model.save(filename)