In [12]:
# Dependencies
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder, LabelBinarizer, MinMaxScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical

In [2]:
# Read exoplanet data
df = pd.read_csv("exoplanet_data.csv")
df = df.dropna(axis='columns', how='all')
df = df.dropna()
print(df.describe())
df.head()

       koi_fpflag_nt  koi_fpflag_ss  koi_fpflag_co  koi_fpflag_ec  \
count    6991.000000    6991.000000    6991.000000    6991.000000   
mean        0.157059       0.244743       0.202975       0.125018   
std         0.363882       0.429966       0.402243       0.330763   
min         0.000000       0.000000       0.000000       0.000000   
25%         0.000000       0.000000       0.000000       0.000000   
50%         0.000000       0.000000       0.000000       0.000000   
75%         0.000000       0.000000       0.000000       0.000000   
max         1.000000       1.000000       1.000000       1.000000   

        koi_period  koi_period_err1  koi_period_err2  koi_time0bk  \
count  6991.000000     6.991000e+03     6.991000e+03  6991.000000   
mean     56.191248     1.851122e-03    -1.851122e-03   164.488820   
std     117.570962     7.184503e-03     7.184503e-03    67.020475   
min       0.259820     1.100000e-08    -1.568000e-01   120.515914   
25%       2.620126     5.005000e-

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,-211,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714


In [3]:
print(df.columns)
df_renamed=df.rename(columns={"koi_disposition":"Disposition",
                              "koi_fpflag_nt":"Not Transit Like",
                              "koi_fpflag_ss":"Stellar Eclipse",
                              "koi_fpflag_co":"Centroid Offset",
                              "koi_fpflag_ec":"Ephemeris Match Indicates Contamination",
                              "koi_period":"Orbital Period (Days)",
                              "koi_period_err1":"Orbital Period Positive Error",
                              "koi_period_err2":"Orbital Period Negative Error",
                              "koi_time0bk":"Transit Epoch (BJD - 2,454,833 Days)",
                              "koi_time0bk_err1":"Transit Epoch Positive Error",
                              "koi_time0bk_err2":"Transit Epoch Negative Error",
                              "koi_impact":"Impact",
                              "koi_impact_err1":"Impact Positive Error",
                              "koi_impact_err2":"Impact Negative Error",
                              "koi_duration":"Transit Duration (Hrs)",
                              "koi_duration_err1":"Transit Duration Postive Error",
                              "koi_duration_err2":"Transit Duration Negative Error",
                              "koi_depth":"Transit Depth (Parts/M)",
                              "koi_depth_err1":"Transit Depth Positive Error",
                              "koi_depth_err2":"Transit Depth Negative Error",
                              "koi_prad":"Planetary Radius (Earth Radii)",
                              "koi_prad_err1":"Planetary Radius Positive Error",
                              "koi_prad_err2":"Planetary Radius Negative Error",
                              "koi_teq":"Equilibrium Temperature (K)",
                              "koi_insol":"Insolation Flux (Earth Flux)",
                              "koi_insol_err1":"Insolation Flux Positive Error",
                              "koi_insol_err2":"Insolation Flux Negative Error",
                              "koi_model_snr":"Transit Signal-to-Noise",
                              "koi_tce_plnt_num":"TCE Planet Number",
                              "koi_steff":"Stellar Effective Temperature (K)",
                              "koi_steff_err1":"Stellar Effective Temperature Positive Error",
                              "koi_steff_err2":"Stellar Effective Temperature Negative Error",
                              "koi_slogg":"Stellar Surface Gravity",
                              "koi_slogg_err1":"Stellar Surface Gravity Positive Error",
                              "koi_slogg_err2":"Stellar Surface Gravity Negative Error",
                              "koi_srad":"Stellar Radius (Solar Radii)",
                              "koi_srad_err1":"Stellar Radius Positive Error",
                              "koi_srad_err2":"Stellar Radius Negative Error",
                              "ra":"KIC Right Ascension (Degrees)",
                              "dec":"KIC Declination (Degrees)",
                              "koi_kepmag":"Kepler-band (Magnitude)"
                  })
print(df_renamed.columns)
df_renamed["Confirmed"] = df_renamed["Disposition"].apply(lambda x: 1 if x == "CONFIRMED" else 0)
df_renamed.head()

Index(['koi_disposition', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period', 'koi_period_err1', 'koi_period_err2',
       'koi_time0bk', 'koi_time0bk_err1', 'koi_time0bk_err2', 'koi_impact',
       'koi_impact_err1', 'koi_impact_err2', 'koi_duration',
       'koi_duration_err1', 'koi_duration_err2', 'koi_depth', 'koi_depth_err1',
       'koi_depth_err2', 'koi_prad', 'koi_prad_err1', 'koi_prad_err2',
       'koi_teq', 'koi_insol', 'koi_insol_err1', 'koi_insol_err2',
       'koi_model_snr', 'koi_tce_plnt_num', 'koi_steff', 'koi_steff_err1',
       'koi_steff_err2', 'koi_slogg', 'koi_slogg_err1', 'koi_slogg_err2',
       'koi_srad', 'koi_srad_err1', 'koi_srad_err2', 'ra', 'dec',
       'koi_kepmag'],
      dtype='object')
Index(['Disposition', 'Not Transit Like', 'Stellar Eclipse', 'Centroid Offset',
       'Ephemeris Match Indicates Contamination', 'Orbital Period (Days)',
       'Orbital Period Positive Error', 'Orbital Period Negative Error',
     

Unnamed: 0,Disposition,Not Transit Like,Stellar Eclipse,Centroid Offset,Ephemeris Match Indicates Contamination,Orbital Period (Days),Orbital Period Positive Error,Orbital Period Negative Error,"Transit Epoch (BJD - 2,454,833 Days)",Transit Epoch Positive Error,...,Stellar Surface Gravity,Stellar Surface Gravity Positive Error,Stellar Surface Gravity Negative Error,Stellar Radius (Solar Radii),Stellar Radius Positive Error,Stellar Radius Negative Error,KIC Right Ascension (Degrees),KIC Declination (Degrees),Kepler-band (Magnitude),Confirmed
0,CONFIRMED,0,0,0,0,54.418383,0.0002479,-0.0002479,162.51384,0.00352,...,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347,1
1,FALSE POSITIVE,0,1,0,0,19.89914,1.49e-05,-1.49e-05,175.850252,0.000581,...,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436,0
2,FALSE POSITIVE,0,1,0,0,1.736952,2.63e-07,-2.63e-07,170.307565,0.000115,...,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.28521,15.597,0
3,CONFIRMED,0,0,0,0,2.525592,3.76e-06,-3.76e-06,171.59555,0.00113,...,4.438,0.07,-0.21,1.046,0.334,-0.133,288.75488,48.2262,15.509,1
4,CONFIRMED,0,0,0,0,4.134435,1.05e-05,-1.05e-05,172.97937,0.0019,...,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.22467,15.714,1


In [37]:
# Preprocess y
y = df_renamed["Disposition"]
le = LabelEncoder()
le.fit(y)
print(list(le.classes_))
encoded_y = le.transform(y)
one_hot_y = to_categorical(encoded_y)
one_hot_y

['CANDIDATE', 'CONFIRMED', 'FALSE POSITIVE']


array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

In [38]:
# Preprocess X
X = df_renamed.drop(columns=["Disposition","Confirmed"], axis=1)
X_scaler = MinMaxScaler().fit(X)
X_transformed = X_scaler.transform(X)
print(X_transformed.shape, one_hot_y.shape)
X_train, X_test, y_train, y_test = train_test_split(X_transformed, one_hot_y, random_state=42)
#X_scaler = MinMaxScaler().fit(X_train)
#X_train_scaled = X_scaler.transform(X_train)
#X_test_scaled = X_scaler.transform(X_test)

(6991, 40) (6991, 3)


In [39]:
# Gridsearch
model = SVC(kernel='linear')
model
param_grid = {'C': [1, 5, 10, 50],'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model, param_grid, verbose=3)
#grid.fit(X_train_scaled, y_train_categorical)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] ..................... C=1, gamma=0.0001, score=nan, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ..................... C=1, gamma=0.0001, score=nan, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ..................... C=1, gamma=0.0001, score=nan, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ..................... C=1, gamma=0.0001, score=nan, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ..................... C=1, gamma=0.0001, score=nan, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ..................... C=1, gamma=0.0005, score=nan, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ...........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
ValueError: bad input shape (4194, 3)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
ValueError: bad input shape (4194, 3)

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad in

[CV] ..................... C=10, gamma=0.001, score=nan, total=   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ..................... C=10, gamma=0.001, score=nan, total=   0.0s
[CV] C=10, gamma=0.005 ...............................................
[CV] ..................... C=10, gamma=0.005, score=nan, total=   0.0s
[CV] C=10, gamma=0.005 ...............................................
[CV] ..................... C=10, gamma=0.005, score=nan, total=   0.0s
[CV] C=10, gamma=0.005 ...............................................
[CV] ..................... C=10, gamma=0.005, score=nan, total=   0.0s
[CV] C=10, gamma=0.005 ...............................................
[CV] ..................... C=10, gamma=0.005, score=nan, total=   0.0s
[CV] C=10, gamma=0.005 ...............................................
[CV] ..................... C=10, gamma=0.005, score=nan, total=   0.0s
[CV] C=50, gamma=0.0001 ..............................................
[CV] .

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4194, 3)

ValueError: bad input shape (4195, 3)

ValueError: bad input sha

ValueError: bad input shape (5243, 3)

In [None]:
print("Best Parameters")
print(grid.best_params_)
print("Best Score")
print(grid.best_score_)
print("Classification Report")
predictions = grid.predict(X_test_scaled)
print(classification_report(y_test, grid_predictions))