# Importing libraries 

In [1]:
# Import packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn Packages
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Sklearn Evaluation Metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, precision_score, confusion_matrix, accuracy_score

# Visualizes all the columns
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Examine Data 

In [5]:
data = pd.read_csv("kepler_objects_interest.csv")
print(data.shape)
data.head()

(9564, 83)


Unnamed: 0,rowid,kepid,kepoi_name,kepler_name,koi_disposition,koi_vet_stat,koi_vet_date,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_disp_prov,koi_comment,koi_period,koi_time0bk,koi_time0,koi_eccen,koi_longp,koi_impact,koi_duration,koi_ingress,koi_depth,koi_ror,koi_srho,koi_fittype,koi_prad,koi_sma,koi_incl,koi_teq,koi_insol,koi_dor,koi_limbdark_mod,koi_ldm_coeff4,koi_ldm_coeff3,koi_ldm_coeff2,koi_ldm_coeff1,koi_parm_prov,koi_max_sngle_ev,koi_max_mult_ev,koi_model_snr,koi_count,koi_num_transits,koi_tce_plnt_num,koi_tce_delivname,koi_quarters,koi_bin_oedp_sig,koi_trans_mod,koi_model_dof,koi_model_chisq,koi_datalink_dvr,koi_datalink_dvs,koi_steff,koi_slogg,koi_smet,koi_srad,koi_smass,koi_sage,koi_sparprov,ra,dec,koi_kepmag,koi_gmag,koi_rmag,koi_imag,koi_zmag,koi_jmag,koi_hmag,koi_kmag,koi_fwm_stat_sig,koi_fwm_sra,koi_fwm_sdec,koi_fwm_srao,koi_fwm_sdeco,koi_fwm_prao,koi_fwm_pdeco,koi_dicco_mra,koi_dicco_mdec,koi_dicco_msky,koi_dikco_mra,koi_dikco_mdec,koi_dikco_msky
0,1,10797460,K00752.01,Kepler-227 b,CONFIRMED,Done,2018-08-16,CANDIDATE,1.0,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,9.488036,170.53875,2455003.539,0.0,,0.146,2.9575,,615.8,0.022344,3.20796,LS+MCMC,2.26,0.0853,89.66,793.0,93.59,24.81,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2291,0.4603,q1_q17_dr25_koi,5.135849,28.47082,35.8,2,142.0,1.0,q1_q17_dr25_tce,11111111111111111000000000000000,0.6864,Mandel and Agol (2002 ApJ 580 171),,,010/010797/010797460/dv/kplr010797460-20160209...,010/010797/010797460/dv/kplr010797460-001-2016...,5455.0,4.467,0.14,0.927,0.919,,q1_q17_dr25_stellar,291.93423,48.141651,15.347,15.89,15.27,15.114,15.006,14.082,13.751,13.648,0.002,19.462294,48.14191,0.43,0.94,-0.0002,-0.00055,-0.01,0.2,0.2,0.08,0.31,0.32
1,2,10797460,K00752.02,Kepler-227 c,CONFIRMED,Done,2018-08-16,CANDIDATE,0.969,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,54.418383,162.51384,2454995.514,0.0,,0.586,4.507,,874.8,0.027954,3.02368,LS+MCMC,2.83,0.2734,89.57,443.0,9.11,77.9,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2291,0.4603,q1_q17_dr25_koi,7.027669,20.109507,25.8,2,25.0,2.0,q1_q17_dr25_tce,11111111111111111000000000000000,0.0023,Mandel and Agol (2002 ApJ 580 171),,,010/010797/010797460/dv/kplr010797460-20160209...,010/010797/010797460/dv/kplr010797460-002-2016...,5455.0,4.467,0.14,0.927,0.919,,q1_q17_dr25_stellar,291.93423,48.141651,15.347,15.89,15.27,15.114,15.006,14.082,13.751,13.648,0.003,19.462265,48.14199,-0.63,1.23,0.00066,-0.00105,0.39,0.0,0.39,0.49,0.12,0.5
2,3,10811496,K00753.01,,CANDIDATE,Done,2018-08-16,CANDIDATE,0.0,0,0,0,0,q1_q17_dr25_sup_koi,DEEP_V_SHAPED,19.89914,175.850252,2455008.85,0.0,,0.969,1.7822,,10829.0,0.154046,7.29555,LS+MCMC,14.6,0.1419,88.96,638.0,39.3,53.5,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2711,0.3858,q1_q17_dr25_koi,37.159767,187.4491,76.3,1,56.0,1.0,q1_q17_dr25_tce,11111101110111011000000000000000,0.6624,Mandel and Agol (2002 ApJ 580 171),,,010/010811/010811496/dv/kplr010811496-20160209...,010/010811/010811496/dv/kplr010811496-001-2016...,5853.0,4.544,-0.18,0.868,0.961,,q1_q17_dr25_stellar,297.00482,48.134129,15.436,15.943,15.39,15.22,15.166,14.254,13.9,13.826,0.278,19.800321,48.13412,-0.021,-0.038,0.0007,0.0006,-0.025,-0.034,0.042,0.002,-0.027,0.027
3,4,10848459,K00754.01,,FALSE POSITIVE,Done,2018-08-16,FALSE POSITIVE,0.0,0,1,0,0,q1_q17_dr25_sup_koi,MOD_ODDEVEN_DV---MOD_ODDEVEN_ALT---DEEP_V_SHAPED,1.736952,170.307565,2455003.308,0.0,,1.276,2.40641,,8079.2,0.387394,0.2208,LS+MCMC,33.46,0.0267,67.09,1395.0,891.96,3.278,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2865,0.3556,q1_q17_dr25_koi,39.06655,541.8951,505.6,1,621.0,1.0,q1_q17_dr25_tce,11111110111011101000000000000000,0.0,Mandel and Agol (2002 ApJ 580 171),,,010/010848/010848459/dv/kplr010848459-20160209...,010/010848/010848459/dv/kplr010848459-001-2016...,5805.0,4.564,-0.52,0.791,0.836,,q1_q17_dr25_stellar,285.53461,48.28521,15.597,16.1,15.554,15.382,15.266,14.326,13.911,13.809,0.0,19.035638,48.28521,-0.111,0.002,0.00302,-0.00142,-0.249,0.147,0.289,-0.257,0.099,0.276
4,5,10854555,K00755.01,Kepler-664 b,CONFIRMED,Done,2018-08-16,CANDIDATE,1.0,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,2.525592,171.59555,2455004.596,0.0,,0.701,1.6545,,603.3,0.024064,1.98635,LS+MCMC,2.75,0.0374,85.41,1406.0,926.16,8.75,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2844,0.3661,q1_q17_dr25_koi,4.749945,33.1919,40.9,1,515.0,1.0,q1_q17_dr25_tce,01111111111111111000000000000000,0.309,Mandel and Agol (2002 ApJ 580 171),,,010/010854/010854555/dv/kplr010854555-20160209...,010/010854/010854555/dv/kplr010854555-001-2016...,6031.0,4.438,0.07,1.046,1.095,,q1_q17_dr25_stellar,288.75488,48.2262,15.509,16.015,15.468,15.292,15.241,14.366,14.064,13.952,0.733,19.250326,48.22626,-0.01,0.23,8e-05,-7e-05,0.03,-0.09,0.1,0.07,0.02,0.07


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9564 entries, 0 to 9563
Data columns (total 83 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   rowid              9564 non-null   int64  
 1   kepid              9564 non-null   int64  
 2   kepoi_name         9564 non-null   object 
 3   kepler_name        2673 non-null   object 
 4   koi_disposition    9564 non-null   object 
 5   koi_vet_stat       9564 non-null   object 
 6   koi_vet_date       9564 non-null   object 
 7   koi_pdisposition   9564 non-null   object 
 8   koi_score          8054 non-null   float64
 9   koi_fpflag_nt      9564 non-null   int64  
 10  koi_fpflag_ss      9564 non-null   int64  
 11  koi_fpflag_co      9564 non-null   int64  
 12  koi_fpflag_ec      9564 non-null   int64  
 13  koi_disp_prov      9564 non-null   object 
 14  koi_comment        8355 non-null   object 
 15  koi_period         9564 non-null   float64
 16  koi_time0bk        9564 

In [8]:
# Rename columns to English names
data = data.rename(columns = {
"kepid":"KepID",
"kepoi_name":"KOIName",
"kepler_name":"KeplerName",
"koi_disposition":"ExoplanetArchiveDisposition",
"koi_vet_stat":"VettingStatus",
"koi_vet_date":"DateofLastParameterUpdate",
"koi_pdisposition":"DispositionUsingKeplerData",
"koi_score":"DispositionScore",
"koi_fpflag_nt":"NotTransit-LikeFalsePositiveFlag",
"koi_fpflag_ss":"StellarEclipseFalsePositiveFlag",
"koi_fpflag_co":"CentroidOffsetFalsePositiveFlag",
"koi_fpflag_ec":"EphemerisMatchIndicatesContaminationFalsePositiveFlag",
"koi_disp_prov":"DispositionProvenance",
"koi_comment":"Comment",
"koi_period":"OrbitalPeriod[days]",
"koi_time0bk":"TransitEpoch[BKJD]",
"koi_time0":"TransitEpoch[BJD]",
"koi_eccen":"Eccentricity",
"koi_longp":"Long.ofPeriastron[deg]",
"koi_impact":"ImpactParameter",
"koi_duration":"TransitDuration[hrs]",
"koi_ingress":"IngressDuration[hrs]",
"koi_depth":"TransitDepth[ppm]",
"koi_ror":"Planet-StarRadiusRatio",
"koi_srho":"FittedStellarDensity[g/cm**3]",
"koi_fittype":"PlanetaryFitType",
"koi_prad":"PlanetaryRadius[Earthradii]",
"koi_sma":"OrbitSemi-MajorAxis[au]",
"koi_incl":"Inclination[deg]",
"koi_teq":"EquilibriumTemperature[K]",
"koi_insol":"InsolationFlux[Earthflux]",
"koi_dor":"Planet-StarDistanceoverStarRadius",
"koi_limbdark_mod":"LimbDarkeningModel",
"koi_ldm_coeff4":"LimbDarkeningCoeff.4",
"koi_ldm_coeff3":"LimbDarkeningCoeff.3",
"koi_ldm_coeff2":"LimbDarkeningCoeff.2",
"koi_ldm_coeff1":"LimbDarkeningCoeff.1",
"koi_parm_prov":"ParametersProvenance",
"koi_max_sngle_ev":"MaximumSingleEventStatistic",
"koi_max_mult_ev":"MaximumMultipleEventStatistic",
"koi_model_snr":"TransitSignal-to-Noise",
"koi_count":"NumberofPlanets",
"koi_num_transits":"NumberofTransits",
"koi_tce_plnt_num":"TCEPlanetNumber",
"koi_tce_delivname":"TCEDelivery",
"koi_quarters":"Quarters",
"koi_bin_oedp_sig":"Odd-EvenDepthComparisionStatistic",
"koi_trans_mod":"TransitModel",
"koi_model_dof":"DegreesofFreedom",
"koi_model_chisq":"Chi-Square",
"koi_datalink_dvr":"LinktoDVReport",
"koi_datalink_dvs":"LinktoDVSummary",
"koi_steff":"StellarEffectiveTemperature[K]",
"koi_slogg":"StellarSurfaceGravity[log10(cm/s**2)]",
"koi_smet":"StellarMetallicity[dex]",
"koi_srad":"StellarRadius[Solarradii]",
"koi_smass":"StellarMass[Solarmass]",
"koi_sage":"StellarAge[Gyr]",
"koi_sparprov":"StellarParameterProvenance",
"ra":"RA[decimaldegrees]",
"dec":"Dec[decimaldegrees]",
"koi_kepmag":"Kepler-band[mag]",
"koi_gmag":"g'-band[mag]",
"koi_rmag":"r'-band[mag]",
"koi_imag":"i'-band[mag]",
"koi_zmag":"z'-band[mag]",
"koi_jmag":"J-band[mag]",
"koi_hmag":"H-band[mag]",
"koi_kmag":"K-band[mag]",
"koi_fwm_stat_sig":"FWOffsetSignificance[percent]",
"koi_fwm_sra":"FWSource&alpha;(OOT)[hrs]",
"koi_fwm_sdec":"FWSource&delta;(OOT)[deg]",
"koi_fwm_srao":"FWSource&Delta;&alpha;(OOT)[sec]",
"koi_fwm_sdeco":"FWSource&Delta;&delta;(OOT)[arcsec]",
"koi_fwm_prao":"FW&Delta;&alpha;(OOT)[sec]",
"koi_fwm_pdeco":"FW&Delta;&delta;(OOT)[arcsec]",
"koi_dicco_mra":"PRF&Delta;&alpha;<sub>SQ</sub>(OOT)[arcsec]",
"koi_dicco_mdec":"PRF&Delta;&delta;<sub>SQ</sub>(OOT)[arcsec]",
"koi_dicco_msky":"PRF&Delta;&theta;<sub>SQ</sub>(OOT)[]arcsec",
"koi_dikco_mra":"PRF&Delta;&alpha;<sub>SQ</sub>(KIC)[arcsec]",
"koi_dikco_mdec":"PRF&Delta;&delta;<sub>SQ</sub>(KIC)[arcsec]",
"koi_dikco_msky":"PRF&Delta;&theta;<sub>SQ</sub>(KIC)[arcsec]"
})

In [10]:
data.head()

Unnamed: 0,rowid,KepID,KOIName,KeplerName,ExoplanetArchiveDisposition,VettingStatus,DateofLastParameterUpdate,DispositionUsingKeplerData,DispositionScore,NotTransit-LikeFalsePositiveFlag,StellarEclipseFalsePositiveFlag,CentroidOffsetFalsePositiveFlag,EphemerisMatchIndicatesContaminationFalsePositiveFlag,DispositionProvenance,Comment,OrbitalPeriod[days],TransitEpoch[BKJD],TransitEpoch[BJD],Eccentricity,Long.ofPeriastron[deg],ImpactParameter,TransitDuration[hrs],IngressDuration[hrs],TransitDepth[ppm],Planet-StarRadiusRatio,FittedStellarDensity[g/cm**3],PlanetaryFitType,PlanetaryRadius[Earthradii],OrbitSemi-MajorAxis[au],Inclination[deg],EquilibriumTemperature[K],InsolationFlux[Earthflux],Planet-StarDistanceoverStarRadius,LimbDarkeningModel,LimbDarkeningCoeff.4,LimbDarkeningCoeff.3,LimbDarkeningCoeff.2,LimbDarkeningCoeff.1,ParametersProvenance,MaximumSingleEventStatistic,MaximumMultipleEventStatistic,TransitSignal-to-Noise,NumberofPlanets,NumberofTransits,TCEPlanetNumber,TCEDelivery,Quarters,Odd-EvenDepthComparisionStatistic,TransitModel,DegreesofFreedom,Chi-Square,LinktoDVReport,LinktoDVSummary,StellarEffectiveTemperature[K],StellarSurfaceGravity[log10(cm/s**2)],StellarMetallicity[dex],StellarRadius[Solarradii],StellarMass[Solarmass],StellarAge[Gyr],StellarParameterProvenance,RA[decimaldegrees],Dec[decimaldegrees],Kepler-band[mag],g'-band[mag],r'-band[mag],i'-band[mag],z'-band[mag],J-band[mag],H-band[mag],K-band[mag],FWOffsetSignificance[percent],FWSource&alpha;(OOT)[hrs],FWSource&delta;(OOT)[deg],FWSource&Delta;&alpha;(OOT)[sec],FWSource&Delta;&delta;(OOT)[arcsec],FW&Delta;&alpha;(OOT)[sec],FW&Delta;&delta;(OOT)[arcsec],PRF&Delta;&alpha;<sub>SQ</sub>(OOT)[arcsec],PRF&Delta;&delta;<sub>SQ</sub>(OOT)[arcsec],PRF&Delta;&theta;<sub>SQ</sub>(OOT)[]arcsec,PRF&Delta;&alpha;<sub>SQ</sub>(KIC)[arcsec],PRF&Delta;&delta;<sub>SQ</sub>(KIC)[arcsec],PRF&Delta;&theta;<sub>SQ</sub>(KIC)[arcsec]
0,1,10797460,K00752.01,Kepler-227 b,CONFIRMED,Done,2018-08-16,CANDIDATE,1.0,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,9.488036,170.53875,2455003.539,0.0,,0.146,2.9575,,615.8,0.022344,3.20796,LS+MCMC,2.26,0.0853,89.66,793.0,93.59,24.81,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2291,0.4603,q1_q17_dr25_koi,5.135849,28.47082,35.8,2,142.0,1.0,q1_q17_dr25_tce,11111111111111111000000000000000,0.6864,Mandel and Agol (2002 ApJ 580 171),,,010/010797/010797460/dv/kplr010797460-20160209...,010/010797/010797460/dv/kplr010797460-001-2016...,5455.0,4.467,0.14,0.927,0.919,,q1_q17_dr25_stellar,291.93423,48.141651,15.347,15.89,15.27,15.114,15.006,14.082,13.751,13.648,0.002,19.462294,48.14191,0.43,0.94,-0.0002,-0.00055,-0.01,0.2,0.2,0.08,0.31,0.32
1,2,10797460,K00752.02,Kepler-227 c,CONFIRMED,Done,2018-08-16,CANDIDATE,0.969,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,54.418383,162.51384,2454995.514,0.0,,0.586,4.507,,874.8,0.027954,3.02368,LS+MCMC,2.83,0.2734,89.57,443.0,9.11,77.9,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2291,0.4603,q1_q17_dr25_koi,7.027669,20.109507,25.8,2,25.0,2.0,q1_q17_dr25_tce,11111111111111111000000000000000,0.0023,Mandel and Agol (2002 ApJ 580 171),,,010/010797/010797460/dv/kplr010797460-20160209...,010/010797/010797460/dv/kplr010797460-002-2016...,5455.0,4.467,0.14,0.927,0.919,,q1_q17_dr25_stellar,291.93423,48.141651,15.347,15.89,15.27,15.114,15.006,14.082,13.751,13.648,0.003,19.462265,48.14199,-0.63,1.23,0.00066,-0.00105,0.39,0.0,0.39,0.49,0.12,0.5
2,3,10811496,K00753.01,,CANDIDATE,Done,2018-08-16,CANDIDATE,0.0,0,0,0,0,q1_q17_dr25_sup_koi,DEEP_V_SHAPED,19.89914,175.850252,2455008.85,0.0,,0.969,1.7822,,10829.0,0.154046,7.29555,LS+MCMC,14.6,0.1419,88.96,638.0,39.3,53.5,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2711,0.3858,q1_q17_dr25_koi,37.159767,187.4491,76.3,1,56.0,1.0,q1_q17_dr25_tce,11111101110111011000000000000000,0.6624,Mandel and Agol (2002 ApJ 580 171),,,010/010811/010811496/dv/kplr010811496-20160209...,010/010811/010811496/dv/kplr010811496-001-2016...,5853.0,4.544,-0.18,0.868,0.961,,q1_q17_dr25_stellar,297.00482,48.134129,15.436,15.943,15.39,15.22,15.166,14.254,13.9,13.826,0.278,19.800321,48.13412,-0.021,-0.038,0.0007,0.0006,-0.025,-0.034,0.042,0.002,-0.027,0.027
3,4,10848459,K00754.01,,FALSE POSITIVE,Done,2018-08-16,FALSE POSITIVE,0.0,0,1,0,0,q1_q17_dr25_sup_koi,MOD_ODDEVEN_DV---MOD_ODDEVEN_ALT---DEEP_V_SHAPED,1.736952,170.307565,2455003.308,0.0,,1.276,2.40641,,8079.2,0.387394,0.2208,LS+MCMC,33.46,0.0267,67.09,1395.0,891.96,3.278,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2865,0.3556,q1_q17_dr25_koi,39.06655,541.8951,505.6,1,621.0,1.0,q1_q17_dr25_tce,11111110111011101000000000000000,0.0,Mandel and Agol (2002 ApJ 580 171),,,010/010848/010848459/dv/kplr010848459-20160209...,010/010848/010848459/dv/kplr010848459-001-2016...,5805.0,4.564,-0.52,0.791,0.836,,q1_q17_dr25_stellar,285.53461,48.28521,15.597,16.1,15.554,15.382,15.266,14.326,13.911,13.809,0.0,19.035638,48.28521,-0.111,0.002,0.00302,-0.00142,-0.249,0.147,0.289,-0.257,0.099,0.276
4,5,10854555,K00755.01,Kepler-664 b,CONFIRMED,Done,2018-08-16,CANDIDATE,1.0,0,0,0,0,q1_q17_dr25_sup_koi,NO_COMMENT,2.525592,171.59555,2455004.596,0.0,,0.701,1.6545,,603.3,0.024064,1.98635,LS+MCMC,2.75,0.0374,85.41,1406.0,926.16,8.75,Claret (2011 A&A 529 75) ATLAS LS,0.0,0.0,0.2844,0.3661,q1_q17_dr25_koi,4.749945,33.1919,40.9,1,515.0,1.0,q1_q17_dr25_tce,01111111111111111000000000000000,0.309,Mandel and Agol (2002 ApJ 580 171),,,010/010854/010854555/dv/kplr010854555-20160209...,010/010854/010854555/dv/kplr010854555-001-2016...,6031.0,4.438,0.07,1.046,1.095,,q1_q17_dr25_stellar,288.75488,48.2262,15.509,16.015,15.468,15.292,15.241,14.366,14.064,13.952,0.733,19.250326,48.22626,-0.01,0.23,8e-05,-7e-05,0.03,-0.09,0.1,0.07,0.02,0.07
