In [None]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

In [None]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib

In [None]:
import pandas as pd

# Read the CSV and Perform Basic Data Cleaning

In [None]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

# Select your features (columns)

In [None]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period', 'koi_time0bk', 'koi_impact',
       'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq', 'koi_insol',
       'koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 'koi_slogg',
       'koi_srad', 'ra', 'dec', 'koi_kepmag']]

In [None]:
selected_features

In [7]:
y = df[["koi_disposition"]]

In [8]:
y = y.values.reshape(-1,)
y.shape

(6991,)

# Create a Train Test Split

Use `koi_disposition` for the y values

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, y, test_size=0.20)

In [10]:
X_train.head()

Unnamed: 0,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_tce_plnt_num,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
1322,0,1,1,1,5.776759,143.09616,1.272,6.789,3522.5,32.77,954,195.79,141.4,1,5544,4.435,0.875,298.80206,44.420349,14.572
718,0,0,0,0,13.351512,363.086742,0.573,3.6045,3308.8,4.44,585,27.77,107.9,1,4843,4.562,0.754,290.1127,48.577782,15.825
6128,0,1,0,0,1.827559,132.26554,0.093,4.72808,314680.0,118.28,2178,5331.71,920.8,1,5977,3.854,2.077,301.3035,44.99966,13.661
3126,0,0,0,0,3.708586,134.47308,0.115,1.94,305.4,1.33,995,231.84,11.3,1,5420,4.596,0.771,291.64294,37.632408,15.947
5676,0,1,0,0,63.681929,167.547736,0.583,12.6047,341720.0,71.45,486,13.15,963.5,1,5979,4.386,1.054,288.53741,39.83408,14.995


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [11]:
# Scale your data
from sklearn.preprocessing import MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Train the Model



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()

model.add(Dense(units=100, activation='relu', input_dim=20))
model.add(Dense(units=3, activation='softmax'))

In [None]:
model.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
model.fit(X_train_scaled,
         y_train_categorical,
         epochs=100,
         shuffle=True,
         verbose=2)

In [None]:
print(f"Training Data Score: {model.evaluate(X_train_scaled, y_train_categorical)}")
print(f"Testing Data Score: {model.evaluate(X_test_scaled, y_test_categorical)}")

# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [None]:
list = [25, 50, 75, 100]

for x in list:
    model2 = Sequential()

    model2.add(Dense(units=x, activation='relu', input_dim=20))
    model2.add(Dense(units=3, activation='softmax'))
    
    model2.compile(optimizer='adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])
    
    model2.fit(X_train_scaled,
         y_train_categorical,
         epochs=100,
         shuffle=True,
         verbose=0)
    
    print(f"Units: {x}, testing score:{model2.evaluate(X_test_scaled, y_test_categorical)}")

# Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'your_name.sav'
joblib.dump(your_model, filename)