# Basic Binary Classification

See original post at http://3leafnodes.com/support-vector-machines-classification

## Imports

In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib notebook

## Import and Explore the Data

In [2]:
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

In [3]:
breast_cancer.DESCR



In [4]:
breast_cancer.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'], 
      dtype='<U23')

In [5]:
# use this to show a non-truncated table
pd.set_option('display.max_columns', None)  
df = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## Train the Model 

In [6]:
# extracting the first 2 features
X = X[:,:2]

X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=0)

svclf = SVC(kernel="linear", C=1, random_state=0)
svclf.fit(X, y)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False)

### Support Vectors 

In [7]:
X.shape

(569, 2)

In [8]:
svclf.support_vectors_[:5]

array([[ 17.99,  10.38],
       [ 11.42,  20.38],
       [ 12.45,  15.7 ],
       [ 13.71,  20.83],
       [ 13.  ,  21.82]])

In [9]:
svclf.support_vectors_.shape

(152, 2)

## Scoring and Metrics

In [10]:
svclf.score(X_train, y_train)

0.88497652582159625

In [11]:
y_pred = svclf.predict(X_test)
mse = metrics.mean_squared_error(y_test,y_pred)
mse

0.097902097902097904

## Visualisation

In [12]:
# adapted from: http://bit.ly/2iv7FFL

def make_meshgrid(x, y, h=.02):
    x_min, x_max = x.min() - 1, x.max() + 1
    y_min, y_max = y.min() - 1, y.max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    return xx, yy

def plot_contours(ax, clf, xx, yy, **params):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    out = ax.contourf(xx, yy, Z, **params)
    return out

def plot_model(model, x1, x2, title):

    X0, X1 = X[:, 0], X[:, 1]
    xx, yy = make_meshgrid(X0, X1)
    
    plt.figure()    
    plot_contours(plt, model, xx, yy, cmap=plt.cm.Vega10, alpha=0.75)
    plt.scatter(X0, X1, c=y, cmap=plt.cm.Vega10, s=15, alpha=0.95, edgecolors='#333333', linewidths=0.3) 
    plt.xlabel(x1)
    plt.ylabel(x2)
    plt.title(title)
    plt.show()    

In [13]:
x1 = 'Mean Radius'
x2 = 'Mean Texture'
title = 'SVC with Linear Kernel for the Breast Cancer Wisconsin Dataset'

plot_model(svclf, x1, x2, title)

<IPython.core.display.Javascript object>

# Binary Classification with Randomised Grid Search and a Pipeline

## Imports 

In [14]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest

## Pipeline and Grid Search 

In [15]:
%%time

svclf = SVC(kernel="linear", random_state=0)
X = breast_cancer.data

param_dist = {'selectkbest__k':list(range(1,30)),
              'svc__C':list(range(1,10))}

pipe = make_pipeline(SelectKBest(), StandardScaler(), svclf)
rand = RandomizedSearchCV(pipe, param_dist, cv=10,   
                          scoring='neg_mean_squared_error',
                          n_iter=10, random_state=0)
rand.fit(X, y)

CPU times: user 1.55 s, sys: 37.5 ms, total: 1.59 s
Wall time: 1.81 s


## Performance

In [16]:
print(-rand.best_score_)

0.0281195079086


In [17]:
print(rand.best_params_)

{'svc__C': 1, 'selectkbest__k': 20}


In [18]:
print(rand.best_estimator_)

Pipeline(memory=None,
     steps=[('selectkbest', SelectKBest(k=20, score_func=<function f_classif at 0x10aa8a6a8>)), ('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False))])


# Multi-Class Classification

## Load and Explore the Data

In [19]:
wine = datasets.load_wine()
X = wine.data
y = wine.target

In [20]:
wine.DESCR



In [21]:
X.shape

(178, 13)

In [22]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], 
      dtype='<U7')

In [23]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [24]:
df = pd.DataFrame(data=wine.data, columns=wine.feature_names)
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [25]:
from sklearn.svm import LinearSVC
wine_clf = LinearSVC(random_state=0)
wine_clf.fit(X[:, :2], y)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
     verbose=0)

In [26]:
x1 = "malic_acid"
x2 = "alcohol"
title = "LinearSVC for the Wine Dataset"

plot_model(wine_clf, x1, x2, title)

<IPython.core.display.Javascript object>

## Train the model

In [27]:
lin_clf = LinearSVC(random_state=0)

param_dist = {'selectkbest__k':list(range(1,13)),
              'linearsvc__C':list(range(1,10))}

pipe = make_pipeline(SelectKBest(), StandardScaler(), lin_clf)
rand = RandomizedSearchCV(pipe, param_dist, cv=10,   
                          scoring='neg_mean_squared_error',
                          n_iter=10, random_state=0)
rand.fit(X, y)

RandomizedSearchCV(cv=10, error_score='raise',
          estimator=Pipeline(memory=None,
     steps=[('selectkbest', SelectKBest(k=10, score_func=<function f_classif at 0x10aa8a6a8>)), ('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linearsvc', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
     verbose=0))]),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'selectkbest__k': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'linearsvc__C': [1, 2, 3, 4, 5, 6, 7, 8, 9]},
          pre_dispatch='2*n_jobs', random_state=0, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=0)

## Performance

In [28]:
print(-rand.best_score_)

0.0280898876404


In [29]:
print(rand.best_params_)

{'selectkbest__k': 11, 'linearsvc__C': 1}


In [30]:
print(rand.best_estimator_)

Pipeline(memory=None,
     steps=[('selectkbest', SelectKBest(k=11, score_func=<function f_classif at 0x10aa8a6a8>)), ('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linearsvc', LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
     verbose=0))])
