In [1]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

Requirement already up-to-date: sklearn in c:\users\potas\anaconda3\lib\site-packages (0.0)


In [2]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [3]:
import pandas as pd

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.svm import SVC 

# Necessary for scaling
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report


# Read the CSV and Perform Basic Data Cleaning

In [4]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head(100)

Unnamed: 0,koi_disposition,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,...,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,ra,dec,koi_kepmag
0,CONFIRMED,0,0,0,0,54.418383,2.479000e-04,-2.479000e-04,162.513840,0.003520,...,-81,4.467,0.064,-0.096,0.927,0.105,-0.061,291.93423,48.141651,15.347
1,FALSE POSITIVE,0,1,0,0,19.899140,1.490000e-05,-1.490000e-05,175.850252,0.000581,...,-176,4.544,0.044,-0.176,0.868,0.233,-0.078,297.00482,48.134129,15.436
2,FALSE POSITIVE,0,1,0,0,1.736952,2.630000e-07,-2.630000e-07,170.307565,0.000115,...,-174,4.564,0.053,-0.168,0.791,0.201,-0.067,285.53461,48.285210,15.597
3,CONFIRMED,0,0,0,0,2.525592,3.760000e-06,-3.760000e-06,171.595550,0.001130,...,-211,4.438,0.070,-0.210,1.046,0.334,-0.133,288.75488,48.226200,15.509
4,CONFIRMED,0,0,0,0,4.134435,1.050000e-05,-1.050000e-05,172.979370,0.001900,...,-232,4.486,0.054,-0.229,0.972,0.315,-0.105,296.28613,48.224670,15.714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,CONFIRMED,0,0,0,0,8.103633,2.030000e-05,-2.030000e-05,176.948600,0.002050,...,-146,4.574,0.054,-0.041,0.732,0.062,-0.068,283.22141,40.421829,15.289
96,CONFIRMED,0,0,0,0,4.715108,1.940000e-05,-1.940000e-05,134.961780,0.003240,...,-146,4.574,0.054,-0.041,0.732,0.062,-0.068,283.22141,40.421829,15.289
97,CONFIRMED,0,0,0,0,6.365840,8.060000e-06,-8.060000e-06,171.135750,0.001000,...,-155,4.554,0.042,-0.168,0.849,0.212,-0.071,291.14951,40.420521,15.090
98,CONFIRMED,0,0,0,0,3.040330,3.370000e-07,-3.370000e-07,169.949011,0.000089,...,-151,4.519,0.084,-0.063,0.791,0.071,-0.087,299.31610,40.822380,15.028


# Select your features (columns)

In [5]:
# Create a random forest classifier
data_df = df.drop('koi_disposition', axis=1)
target_df = df['koi_disposition']

rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(data_df, target_df)
rf.score(data_df, target_df)

1.0

In [6]:
importances = rf.feature_importances_
importances

array([0.10110254, 0.06246272, 0.11153691, 0.03743911, 0.02117024,
       0.01674539, 0.01605533, 0.01303737, 0.02353362, 0.02445467,
       0.0182652 , 0.01054507, 0.01083092, 0.0233438 , 0.03312069,
       0.03269316, 0.02108591, 0.01387878, 0.01306952, 0.05187095,
       0.02951427, 0.03520105, 0.01455219, 0.01443997, 0.01959444,
       0.01263898, 0.05796708, 0.00308077, 0.00946242, 0.03257497,
       0.02713803, 0.00866627, 0.00870389, 0.00949155, 0.0087613 ,
       0.01103208, 0.00816924, 0.01207371, 0.01053275, 0.01016314])

In [7]:
sorted(zip(rf.feature_importances_, list(data_df.columns)), reverse=True)

[(0.11153691370132846, 'koi_fpflag_co'),
 (0.10110253576083224, 'koi_fpflag_nt'),
 (0.06246271711065698, 'koi_fpflag_ss'),
 (0.05796708380614033, 'koi_model_snr'),
 (0.05187094609826031, 'koi_prad'),
 (0.03743910592756362, 'koi_fpflag_ec'),
 (0.03520105428443899, 'koi_prad_err2'),
 (0.03312068570638799, 'koi_duration_err1'),
 (0.032693160954005186, 'koi_duration_err2'),
 (0.0325749674858823, 'koi_steff_err1'),
 (0.029514273722054032, 'koi_prad_err1'),
 (0.027138034137982388, 'koi_steff_err2'),
 (0.024454674436801035, 'koi_time0bk_err2'),
 (0.023533622496744084, 'koi_time0bk_err1'),
 (0.02334380141151466, 'koi_duration'),
 (0.02117023503787026, 'koi_period'),
 (0.02108591079194181, 'koi_depth'),
 (0.019594435787664875, 'koi_insol_err1'),
 (0.018265196170722815, 'koi_impact'),
 (0.01674538722610158, 'koi_period_err1'),
 (0.01605533004751846, 'koi_period_err2'),
 (0.014552188984846235, 'koi_teq'),
 (0.014439968243934814, 'koi_insol'),
 (0.013878783571347215, 'koi_depth_err1'),
 (0.0130695

From the documentation, we cannot use any of the false positive flags, as koi_disposition is dependent on them. I am making a K nearest neighbors model, and I will start with koi_model_snr, which as a former astronomer I recognize as a signal to noise ratio, which is always a good first choice to characterise the dependability of an observation.

Also, errors in measurements are frequently correlate with the measurements themselves, so I have only select a measurement or its error, depending on what is highest on the importance list.

I notice that including data any further down on the importance list than 'koi_impact' starts to produce worsening models.

In [8]:
# Set features. This will also be used as your x values.

# All false positive flags removed, because they are accounted for in koi_disposition.

selected_features = df[['koi_model_snr', 'koi_prad', 'koi_duration_err1', 'koi_steff_err1']]#, \
#                        'koi_time0bk_err1', 'koi_period', 'koi_depth', \
#                        'koi_insol_err1', 'koi_impact']]
selected_features.head()

Unnamed: 0,koi_model_snr,koi_prad,koi_duration_err1,koi_steff_err1
0,25.8,2.83,0.116,81
1,76.3,14.6,0.0341,158
2,505.6,33.46,0.00537,157
3,40.9,2.75,0.042,169
4,40.2,2.77,0.0673,189


# Create a Train Test Split

Use `koi_disposition` for the y values

In [9]:
y = df['koi_disposition']
y.head()

0         CONFIRMED
1    FALSE POSITIVE
2    FALSE POSITIVE
3         CONFIRMED
4         CONFIRMED
Name: koi_disposition, dtype: object

In [10]:
X_train, X_test, y_train, y_test = train_test_split(selected_features, y, random_state=42)
X_train.head()

Unnamed: 0,koi_model_snr,koi_prad,koi_duration_err1,koi_steff_err1
6122,10.8,1.24,0.306,154
6370,13.8,0.86,0.282,158
2879,254.3,3.21,0.0,151
107,38.4,2.25,0.0595,76
29,696.5,12.21,0.0075,77


# Pre-processing

Scale the data using the MinMaxScaler and perform some feature selection

In [11]:
# Scale your data

# Inspired by MinMax under /supplemental in Ins_Data_Preprocessing
X_minmax = MinMaxScaler().fit(X_train)

X_train_minmax = X_minmax.transform(X_train)
X_test_minmax = X_minmax.transform(X_test)



# Train the Model



In [12]:
model = SVC(kernel='linear')
model.fit(X_train_minmax, y_train)
predictions = model.predict(X_test_minmax)

In [13]:
y_test

4982    FALSE POSITIVE
4866         CANDIDATE
2934    FALSE POSITIVE
5007    FALSE POSITIVE
3869    FALSE POSITIVE
             ...      
4006         CANDIDATE
6985         CANDIDATE
1468         CONFIRMED
5138         CONFIRMED
2770         CONFIRMED
Name: koi_disposition, Length: 1748, dtype: object

In [14]:
print(classification_report(y_test, predictions,
                            target_names=['CONFIRMED', 'CANDIDATE', 'FALSE POSITIVE']))


                precision    recall  f1-score   support

     CONFIRMED       0.00      0.00      0.00       411
     CANDIDATE       0.55      0.63      0.59       484
FALSE POSITIVE       0.62      0.87      0.73       853

      accuracy                           0.60      1748
     macro avg       0.39      0.50      0.44      1748
  weighted avg       0.46      0.60      0.52      1748



  _warn_prf(average, modifier, msg_start, len(result))


# Hyperparameter Tuning

Use `GridSearchCV` to tune the model's parameters

In [16]:
# Create the GridSearchCV model

model = SVC()

param_grid = {'kernel': ['linear', 'rbf', 'sigmoid'], 'C': [0.5, 1], 'gamma': [10000, 1000, 100, 10], \
             'shrinking': [True, False], 'probability': [True, False]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [17]:
# Train the model with GridSearch
grid.fit(X_train_minmax, y_train)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.609, total=   1.0s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.9s remaining:    0.0s


[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.613, total=   1.1s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.0s remaining:    0.0s


[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.616, total=   1.0s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.602, total=   0.9s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.610, total=   1.0s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False 
[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False, score=0.609, total=   1.0s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False 
[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False, score=0.613, total=   1.0s
[CV] C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False 
[CV]  C=0.5, gamma=10000, kernel=linear, probability=True, shrinking=False, score=0.616, total=   1.0s
[CV] C=0.5, 

[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=True, shrinking=False, score=0.506, total=   1.5s
[CV] C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.504, total=   0.4s
[CV] C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.504, total=   0.3s
[CV] C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.504, total=   0.4s
[CV] C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.504, total=   0.5s
[CV] C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.506, total=   0

[CV]  C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.649, total=   0.7s
[CV] C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False 
[CV]  C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.643, total=   0.9s
[CV] C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False 
[CV]  C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.635, total=   0.8s
[CV] C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False 
[CV]  C=0.5, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.640, total=   0.8s
[CV] C=0.5, gamma=1000, kernel=sigmoid, probability=True, shrinking=True 
[CV]  C=0.5, gamma=1000, kernel=sigmoid, probability=True, shrinking=True, score=0.505, total=   1.8s
[CV] C=0.5, gamma=1000, kernel=sigmoid, probability=True, shrinking=True 
[CV]  C=0.5, gamma=1000, kernel=sigmoid, probability=True, shrinking=True, score=0.506, total=   1.8s
[CV] C=0.5, gamma=1000, kernel=s

[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=True, score=0.625, total=   2.0s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=True ..
[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=True, score=0.640, total=   2.0s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False .
[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.646, total=   2.0s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False .
[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.643, total=   2.1s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False .
[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.641, total=   2.0s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False .
[CV]  C=0.5, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.625, total=   2.1s
[CV] C=0.5, gamma=100, kernel=rbf, probability=True, shrink

[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.613, total=   0.3s
[CV] C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.616, total=   0.4s
[CV] C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.602, total=   0.3s
[CV] C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True 
[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.610, total=   0.3s
[CV] C=0.5, gamma=10, kernel=linear, probability=False, shrinking=False 
[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=False, score=0.609, total=   0.3s
[CV] C=0.5, gamma=10, kernel=linear, probability=False, shrinking=False 
[CV]  C=0.5, gamma=10, kernel=linear, probability=False, shrinking=False, score=0.613, total=   0.2s
[CV] C=0.5, gamma=10, kernel=linear,

[CV]  C=0.5, gamma=10, kernel=sigmoid, probability=False, shrinking=False, score=0.563, total=   0.4s
[CV] C=1, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=1, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.614, total=   1.0s
[CV] C=1, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=1, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.613, total=   1.0s
[CV] C=1, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=1, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.621, total=   1.0s
[CV] C=1, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=1, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.600, total=   1.0s
[CV] C=1, gamma=10000, kernel=linear, probability=True, shrinking=True 
[CV]  C=1, gamma=10000, kernel=linear, probability=True, shrinking=True, score=0.615, total=   1.0s
[CV] C=1, gamma=10000, kernel=linear, 

[CV]  C=1, gamma=10000, kernel=sigmoid, probability=True, shrinking=False, score=0.482, total=   1.3s
[CV] C=1, gamma=10000, kernel=sigmoid, probability=True, shrinking=False 
[CV]  C=1, gamma=10000, kernel=sigmoid, probability=True, shrinking=False, score=0.504, total=   1.3s
[CV] C=1, gamma=10000, kernel=sigmoid, probability=True, shrinking=False 
[CV]  C=1, gamma=10000, kernel=sigmoid, probability=True, shrinking=False, score=0.480, total=   1.3s
[CV] C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.486, total=   0.4s
[CV] C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.473, total=   0.4s
[CV] C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True 
[CV]  C=1, gamma=10000, kernel=sigmoid, probability=False, shrinking=True, score=0.482, total=   0.4s
[CV] C=1, gamma=10

[CV]  C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.651, total=   1.0s
[CV] C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False .
[CV]  C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.648, total=   1.0s
[CV] C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False .
[CV]  C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.642, total=   1.0s
[CV] C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False .
[CV]  C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.637, total=   1.0s
[CV] C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False .
[CV]  C=1, gamma=1000, kernel=rbf, probability=False, shrinking=False, score=0.641, total=   1.0s
[CV] C=1, gamma=1000, kernel=sigmoid, probability=True, shrinking=True 
[CV]  C=1, gamma=1000, kernel=sigmoid, probability=True, shrinking=True, score=0.505, total=   1.7s
[CV] C=1, gamma=1000, kernel=sigmoid, probability=True

[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=True, score=0.634, total=   2.0s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=True ....
[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=True, score=0.637, total=   2.1s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=False ...
[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.650, total=   2.2s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=False ...
[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.649, total=   2.1s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=False ...
[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.644, total=   2.2s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=False ...
[CV]  C=1, gamma=100, kernel=rbf, probability=True, shrinking=False, score=0.634, total=   2.1s
[CV] C=1, gamma=100, kernel=rbf, probability=True, shrinking=False ...


[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.621, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, shrinking=True .
[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.600, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, shrinking=True .
[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=True, score=0.615, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, shrinking=False 
[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=False, score=0.614, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, shrinking=False 
[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=False, score=0.613, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, shrinking=False 
[CV]  C=1, gamma=10, kernel=linear, probability=False, shrinking=False, score=0.621, total=   0.3s
[CV] C=1, gamma=10, kernel=linear, probability=False, 

[Parallel(n_jobs=1)]: Done 480 out of 480 | elapsed:  9.4min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.5, 1], 'gamma': [10000, 1000, 100, 10],
                         'kernel': ['linear', 'rbf', 'sigmoid'],
                         'probability': [True, False],
                         'shrinking': [True, False]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [18]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 0.5, 'gamma': 10000, 'kernel': 'rbf', 'probability': True, 'shrinking': True}
0.6515339945713475


In [19]:
# See how the model does predicting koi_disposition.

predictions = grid.predict(X_test_minmax)
print('Test Acc: %.3f' % grid.score(X_test_minmax, y_test))

Test Acc: 0.642


About 64% accuracy seems to be about as good as I can do with a support vector machine model. 

In [21]:
# My previous result favors large gamma and small C. I will keep kernel=rbf, probability true and shrinking true
# and experiment with some smaller C and larger gamma

model = SVC(kernel='rbf', probability=True, shrinking=True)

param_grid = {'C': [0.5, 0.1, 0.01], 'gamma': [10000, 100000]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [22]:
# Train the model with GridSearch
grid.fit(X_train_minmax, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] C=0.5, gamma=10000 ..............................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .................. C=0.5, gamma=10000, score=0.663, total=   3.1s
[CV] C=0.5, gamma=10000 ..............................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.0s remaining:    0.0s


[CV] .................. C=0.5, gamma=10000, score=0.650, total=   2.9s
[CV] C=0.5, gamma=10000 ..............................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    5.9s remaining:    0.0s


[CV] .................. C=0.5, gamma=10000, score=0.649, total=   3.0s
[CV] C=0.5, gamma=10000 ..............................................
[CV] .................. C=0.5, gamma=10000, score=0.648, total=   2.9s
[CV] C=0.5, gamma=10000 ..............................................
[CV] .................. C=0.5, gamma=10000, score=0.648, total=   2.9s
[CV] C=0.5, gamma=100000 .............................................
[CV] ................. C=0.5, gamma=100000, score=0.652, total=   6.1s
[CV] C=0.5, gamma=100000 .............................................
[CV] ................. C=0.5, gamma=100000, score=0.628, total=   6.3s
[CV] C=0.5, gamma=100000 .............................................
[CV] ................. C=0.5, gamma=100000, score=0.629, total=   5.8s
[CV] C=0.5, gamma=100000 .............................................
[CV] ................. C=0.5, gamma=100000, score=0.635, total=   6.5s
[CV] C=0.5, gamma=100000 .............................................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  2.4min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=True, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.5, 0.1, 0.01], 'gamma': [10000, 100000]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [23]:
print(grid.best_params_)
print(grid.best_score_)
predictions = grid.predict(X_test_minmax)
print('Test Acc: %.3f' % grid.score(X_test_minmax, y_test))

{'C': 0.5, 'gamma': 10000}
0.6515339945713475
Test Acc: 0.642


No improvement, therefore I conclude that 64% is about as good as can be achieved with svc.