# SVM Support Vector Machine

In [2]:
# Imports
import warnings
warnings.simplefilter('ignore')

# %matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC 

In [3]:
# Read the csv file into a pandas DataFrame
exoplanet_complete_kNN = pd.read_csv('exoplanet_complete_kNN.csv')
exoplanet_complete_kNN.head()

Unnamed: 0,koi_disposition,koi_score,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,1,1.0,9.488036,170.53875,0.146,2.9575,615.8,2.26,793,93.59,5455,4.467,0.927,291.93423,48.141651,15.347
1,1,0.969,54.418383,162.51384,0.586,4.507,874.8,2.83,443,9.11,5455,4.467,0.927,291.93423,48.141651,15.347
2,3,0.0,19.89914,175.850252,0.969,1.7822,10829.0,14.6,638,39.3,5853,4.544,0.868,297.00482,48.134129,15.436
3,3,0.0,1.736952,170.307565,1.276,2.40641,8079.2,33.46,1395,891.96,5805,4.564,0.791,285.53461,48.28521,15.597
4,1,1.0,2.525592,171.59555,0.701,1.6545,603.3,2.75,1406,926.16,6031,4.438,1.046,288.75488,48.2262,15.509


### key for koi_disposition:
1 = CONFIRMED, 
2 = CANDIDATE, 
3 = FALSE POSITIVE

In [4]:

target = exoplanet_complete_kNN['koi_disposition']
target_names = ["1", "2", "3"]


In [5]:
data = exoplanet_complete_kNN.drop('koi_disposition', axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,koi_score,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_steff,koi_slogg,koi_srad,ra,dec,koi_kepmag
0,1.0,9.488036,170.53875,0.146,2.9575,615.8,2.26,793,93.59,5455,4.467,0.927,291.93423,48.141651,15.347
1,0.969,54.418383,162.51384,0.586,4.507,874.8,2.83,443,9.11,5455,4.467,0.927,291.93423,48.141651,15.347
2,0.0,19.89914,175.850252,0.969,1.7822,10829.0,14.6,638,39.3,5853,4.544,0.868,297.00482,48.134129,15.436
3,0.0,1.736952,170.307565,1.276,2.40641,8079.2,33.46,1395,891.96,5805,4.564,0.791,285.53461,48.28521,15.597
4,1.0,2.525592,171.59555,0.701,1.6545,603.3,2.75,1406,926.16,6031,4.438,1.046,288.75488,48.2262,15.509


In [6]:
# Separate train and test data
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [7]:
# Support vector machine with default classifier rbf. 
# rbf = radial basis function
model = SVC()
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [8]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.497


In [9]:
# Calculate classification report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       571
           2       0.00      0.00      0.00       435
           3       0.50      1.00      0.66       993

   micro avg       0.50      0.50      0.50      1999
   macro avg       0.17      0.33      0.22      1999
weighted avg       0.25      0.50      0.33      1999



Tried svc kernals = linear and = poly. I let them run longer than 10 minutes but force quit the kernal because it hadn't produced output.

In [12]:
# Try SVC model with rbf again but this time include gamma and C values
model = SVC(gamma = .01, C=1)
model.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.496


In [13]:
# Try SVC model with rbf again but with a new gamma value
model = SVC(gamma = 10, C=1)
model.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=10, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [14]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.497


In [15]:
# Try SVC model with rbf again but with a new C value
model = SVC(gamma = 10, C=10)
model.fit(X_train, y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=10, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.497


In [18]:
# Try SVC model with classifier sigmoid 
model = SVC(kernel='sigmoid')
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='sigmoid', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [19]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.497


# Summary:

### Support Vector Machine: 49.7%