In [1]:
import pandas as  pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [2]:
df = pd.read_csv("working_csv.csv")
cols = df.columns
col_list = ["P. Habitable Class","P. Teq Max (K)","S. Mag from Planet","P. SFlux Mean (EU)","S. Teff (K)","P. Mag","P. HZD","P. ESI","P. SPH","P. HZC","P. Gravity (EU)","S. Size from Planet (deg)","S. [Fe/H]","S. DEC (deg)","P. SFlux Min (EU)","S. Hab Zone Max (AU)","P. HZI","P. Eccentricity","P. Appar Size (deg)"]
for words in cols:
    if words not in col_list:
        df.drop(columns = [words] , inplace = True)

In [3]:
df = df.sample(frac=1)

In [4]:
dataset = df.values
X = dataset[:,1:19].astype(float)
Y = dataset[:,0]
print(X)
print(Y)

[[ 4.61000000e+00  3.31142500e+01  5.34272200e+01  7.89000000e+02
  -2.68100000e+01  2.13000000e+01  2.50000000e-01  4.55300000e+03
  -9.00000000e-02  2.34622000e+01 -3.11000000e+01  6.37870000e+00
   1.69790000e+01 -2.13000000e+00  7.27000000e+00  1.10000000e-01
   0.00000000e+00  1.20000000e-01]
 [ 2.15000000e+00  1.86649800e+00  2.40345200e+00  3.39400000e+02
  -2.29600000e+01  1.73400000e+01  1.30000000e-01  5.71000000e+03
   6.00000000e-02 -2.60267000e+01 -2.77000000e+01  8.49600000e-01
   1.56500000e+00 -1.20000000e+00  5.68000000e+00  1.30000000e-01
   0.00000000e+00  4.30000000e-01]
 [ 1.52000000e+00  1.21709400e+02  1.21709400e+02  8.46200000e+02
  -2.32600000e+01  2.81000000e+00  0.00000000e+00  6.09200000e+03
  -6.02714085e-02  4.25280000e+01 -3.20000000e+01  5.29070000e+00
   2.50700000e+00 -2.31000000e+00 -1.60000000e-01  2.90000000e-01
   0.00000000e+00  2.90000000e-01]
 [ 2.17000000e+00  5.51556900e-01  6.63001900e-01  2.41300000e+02
  -1.80500000e+01  3.48000000e+00  9.

In [5]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
print(dummy_y)

[[0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [6]:
# define baseline model
def baseline_model():
# create model
    model = Sequential()
    model.add(Dense(6, input_dim=18, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=2000, batch_size=3, verbose=0)

In [7]:
kfold = KFold(n_splits=9, shuffle=True)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 87.04% (10.48%)
