# Support Vector Machines
SVM USer Guide: https://scikit-learn.org/stable/modules/svm.html# <br>
Details on SVM classification: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC


## Understanding Kernels
From https://iq.opengenus.org/svm-by-improving-classifier/

In [1]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from ipywidgets import interact, IntSlider, FloatSlider, fixed
from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt

In [27]:
def plot_decision_boundary(X_train, X_test, y_train, y_test):
    def plotter(kernel='linear', log_gamma=1, log_C=1, deg=1, coef0=1):
        clf = svm.SVC(C=10**log_C, kernel=kernel, gamma=10**log_gamma, coef0=coef0, probability=True)
        clf.fit(X_train, y_train)
        
        X1, X2 = np.meshgrid(np.linspace(-2, 3), np.linspace(-2, 2))
        y_proba = clf.predict_proba(np.hstack((X1.reshape(-1, 1), X2.reshape(-1, 1))))[:, 1]
        plt.contourf(X1, X2, y_proba.reshape(50, 50), 16, cmap=plt.cm.bwr, alpha=0.75)
        plt.colorbar()

        accuracy = clf.score(X_test, y_test)
        plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='white', cmap=plt.cm.bwr)
        plt.xlabel('$x_1$')
        plt.ylabel('$x_2$')
        plt.title('test set accuracy: {}'.format(accuracy));

    return plotter

In [28]:
def plot_svc_interact(X, y):
    def plotter(log_C=1):
        clf = svm.SVC(C=10**log_C, kernel='linear')
        clf.fit(X, y)
    
        beta = clf.coef_[0]
        beta_0 = clf.intercept_
        slope = -beta[0]/beta[1]
        intercept = -beta_0/beta[1]
       
        x_max = np.ceil(np.abs(X).max())
        x = np.linspace(-x_max, x_max, 100)
        margin_bound_1 = 1/beta[1] + slope*x + intercept
        margin_bound_2 = -1/beta[1] + slope*x + intercept

        plt.plot(x, slope*x + intercept, 'k')
        plt.fill_between(x, margin_bound_1, margin_bound_2, color='k', alpha=0.25, linewidth=0)
        plt.scatter(*clf.support_vectors_.T, s=100, c='y')
        plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.bwr)
        plt.axis([-x_max, x_max, -x_max, x_max])

    return plotter

In [32]:


X, y = make_moons(400, noise=0.25, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

log_C_slider = FloatSlider(min=-4, max=4, step=0.25, value=0, description='$\log(C)$')
log_gamma_slider = FloatSlider(min=-3, max=2, step=0.01, value=0, description='$\log(\gamma$)')
deg_slider = IntSlider(min=1, max=4, step=1, value=2, description='$d$')
coef0_slider = FloatSlider(min=-100, max=100, step=0.1, value=0, description='$r$')

interact(plot_decision_boundary(X_train, X_test, y_train, y_test),
         log_C=log_C_slider,
         log_gamma=log_gamma_slider, 
         kernel=['rbf', 'linear', 'sigmoid', 'poly'],
         deg=deg_slider,
         coef0=coef0_slider);

interactive(children=(Dropdown(description='kernel', index=1, options=('rbf', 'linear', 'sigmoid', 'poly'), va…

## Applying to SVM to a binary classification problem
Adapted from https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python

### Import Breast Cancer Dataset

In [10]:
#Import scikit-learn dataset library
from sklearn import datasets
import pandas as pd

#Load dataset
cancer = datasets.load_breast_cancer()

### Examine the dataset

In [17]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

np.shape(cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


(2,)

### Split data into training and test datasets

In [4]:
# Import train_test_split function
from sklearn.model_selection import train_test_split

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

### Choose your hyperparemeters

**C**: Float default=1.0 <br>
Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. 
<br>
<br>
**kernel**{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’<br>
Specifies the kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape (n_samples, n_samples).
<br>
<br>
**degree**:Int, default=3
Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels.
<br>
<br>
**gamma** {‘scale’, ‘auto’} or float, default=’scale’ <br>
Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. <br>
  -if gamma='scale' (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma, <br>
  -if ‘auto’, uses 1 / n_features. <br>
<br>
<br>
**coef0**: Float, default=0.0 <br>
Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.

In [5]:
C=1
kernel= 'rbf'
gamma='scale'
coef0=1

Fit Model

In [6]:
#Import svm model
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(C=C, kernel=kernel, gamma=gamma, coef0=coef0, probability=True) #Defined Above

#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

### Evaluate Model

In [7]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9239766081871345
