In [215]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



In [216]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [217]:
import pandas as pd
import numpy as np
import tensorflow
tensorflow.keras.__version__

'2.5.0'

# Read the CSV and Perform Basic Data Cleaning

In [218]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
#df = df.dropna(axis='columns', how='all')
# Drop the null rows
#df = df.dropna()
df.head()

Unnamed: 0,rowid,kepid,kepoi_name,kepler_name,koi_disposition,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,1,10797460,K00752.01,Kepler-227 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,2,10797460,K00752.02,Kepler-227 c,CONFIRMED,CANDIDATE,0.969,0,0,0,...,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
2,3,10811496,K00753.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
3,4,10848459,K00754.01,,FALSE POSITIVE,FALSE POSITIVE,0.0,0,1,0,...,-174.0,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
4,5,10854555,K00755.01,Kepler-664 b,CONFIRMED,CANDIDATE,1.0,0,0,0,...,-211.0,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509


# Select your features (columns)

In [237]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_nt','koi_fpflag_ss','koi_fpflag_co','koi_fpflag_ec','koi_period','koi_time0bk','koi_impact','koi_duration','koi_depth','koi_prad','koi_teq','koi_insol','koi_model_snr','koi_steff','koi_slogg','koi_srad','ra','dec','koi_kepmag'
]]
y = df['koi_disposition']

print(selected_features.shape,y.shape)

(9564, 19) (9564,)


# Create a Train Test Split

Use `koi_disposition` for the y values

In [238]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical


In [239]:
X_train, X_test, y_train, y_test = train_test_split(selected_features, y, random_state=1)

In [240]:
np.unique(y_train)

array(['CANDIDATE', 'CONFIRMED', 'FALSE POSITIVE'], dtype=object)

In [241]:
np.unique(y_test)

array(['CANDIDATE', 'CONFIRMED', 'FALSE POSITIVE'], dtype=object)

In [242]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit_transform(y)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [243]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [244]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
9310,1,0,0,0,25.34287,132.6844,0.554,1.51,230.5,1.85,706.0,58.81,7.5,6311.0,4.398,1.172,292.9418,46.193691,15.0
8119,0,1,0,0,40.069675,153.018227,0.767,9.3564,41865.0,27.38,588.0,28.2,1304.8,5911.0,4.321,1.207,296.4682,46.85178,14.458
2858,0,0,1,0,0.575884,132.00778,1.258,2.0734,272.2,31.17,2397.0,7790.32,43.2,6290.0,4.442,1.044,300.71927,45.09605,15.024
5585,0,1,0,0,91.056419,204.836321,1.112,6.7282,57608.0,53.1,431.0,8.13,701.1,6142.0,4.456,1.046,287.66364,49.141499,15.959
4344,0,0,0,0,4.8435,134.81488,0.906,3.045,50.2,0.9,1121.0,373.1,10.4,5762.0,4.348,1.056,282.58282,42.346352,13.59


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [245]:
# Scale your data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train the Model



In [246]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [255]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=19))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

In [256]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [257]:
model.summary()

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_63 (Dense)             (None, 100)               2000      
_________________________________________________________________
dense_64 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_65 (Dense)             (None, 3)                 303       
Total params: 12,403
Trainable params: 12,403
Non-trainable params: 0
_________________________________________________________________


In [258]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 2/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 3/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 4/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 5/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 6/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 7/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 8/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 9/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 10/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 11/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 12/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 13/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 14/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 15/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 16/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 17/60
225/225 - 0s - loss: nan - accuracy: 0.2360
Epoch 18/60
225/225 - 0s - loss: nan - accuracy: 0.2360
E

<tensorflow.python.keras.callbacks.History at 0x1c4b8ee89b0>

In [259]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

75/75 - 0s - loss: nan - accuracy: 0.2321
Normal Neural Network - Loss: nan, Accuracy: 0.23212045431137085


In [260]:
#Make predictions
encoded_predictions = model.predict_classes(X_test_scaled>0.5)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)


In [253]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['CANDIDATE' 'CANDIDATE' 'CANDIDATE' ... 'CANDIDATE' 'CANDIDATE'
 'CANDIDATE']
Actual Labels: ['FALSE POSITIVE', 'CANDIDATE', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CONFIRMED']


# Save the Model

In [254]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
model.save('model2.h5')