In [8]:
import numpy as np
from numpy import stack, vstack, hstack, eye, mean, inf, array, concatenate, diag
from scipy.optimize import minimize, Bounds, LinearConstraint
from sklearn.model_selection import train_test_split
from plotly.express import scatter, scatter_3d
from plotly.graph_objects import Mesh3d
from sklearn.datasets import load_iris

In [9]:
iterable =lambda x: hasattr(x, '--iter--')
def rand(*args):
  if len(args) == 1 and iterable(args[0]):
    return np.random.rand(*args[0])
  return np.random.rand(*args)              # returns rand(3,4)

def randn(*args):
  if len(args) == 1 and iterable(args[0]):
    return np.random.randn(*args[0])
  return np.random.randn(*args)              # returns randn(3,4)

def ones(*args):
  if len(args) == 1:
    return np.ones(args[0])
  return np.ones(args)                       # returns ones(3,4)

def zeros(*args):
  if len(args) == 1:
    return np.zeros(args[0])
  return np.zeros(args)                      # returns zeros(3,4)

In [10]:
# Visualize the data

def show_iris(x, y, labels = None, symbol_label = None, **kwargs):
  if labels is None:
    labels = {'x':x_names[0], 'y':x_names[1], 'z':x_names[2], 'color':'species', 'symbol':symbol_label}
    fig = scatter_3d(x  = x[:,0],  #x-axis
                     y  = x[:,1],  #y-axis
                     z  = x[:,2],  #z-axis
                     color = [y_names[n] for n in y], 
                     labels = labels,
                     **kwargs)
    fig['layout']['scene']['aspectmode'] = 'data'
    return fig

In [11]:
# Function to fix the axis limit
# We need this to plot a large triangle for the  hyperplane
# without automatically changing the axis limit

def lim(x):
  xmin, xmax = x.min(), x.max()
  xr = xmax - xmin
  return [xmin - 0.05*xr, xmax + 0.05*xr]

def fix_axis(fig):
  xlim, ylim, zlim = lim(x[:,0]), lim(x[:,1]), lim(x[:,2])
  yx = (ylim[1] - ylim[0])/(xlim[1] - xlim[0])  
  zx = (zlim[1] - zlim[0])/(xlim[1] - xlim[0])  
  fig.update_layout(scene = dict(
      xaxis = dict(range = [xlim[0], xlim[1]]),
      yaxis = dict(range = [ylim[0], ylim[1]]),
      zaxis = dict(range = [zlim[0], zlim[1]]),
      aspectmode = 'manual',
      aspectratio = dict(x = 1, y = yx, z = zx)))

In [12]:
# fucntion to plot a plane
# this is not always the optimal plane that we are interested in!

def add_plane(fig, w, b, opacity = 0.5, scaling = 100.0):
  # find three vectors that are orthogonal to w=(a,b,c)
  # (-b, a, 0), (-c, 0, a), and (0, -c, b) are orthogonal to w
  
  x_plane  = array([[-w[1], w[0], 0],  # (-b, a, 0) 
                    [-w[2], 0, w[0]],  # (-c, 0, a) 
                    [0, -w[2], w[1]]]) # (0, -c, b)

  # Shift the w direction wuch that x@w + b = 0
  x_plane += -w*b / (w@w) # Shift in w direction
  tau = scaling # scale in all direction, increase if plane is not visible

  x_plane = array([[1-tau, 0, tau],
                   [tau, 1-tau, 0],
                   [0, tau, 1-tau]]) @ x_plane

  fig.add_trace(Mesh3d(
      color = 'green', opacity = opacity,
      x = x_plane[:,0], y = x_plane[:,1], z = x_plane[:,2], # define three points
      i = [0], j = [1], k = [2]))                           # define a triangle

  return x_plane                    

In [13]:
def add_margin(fig, w, b):
  # fix the axis
  fix_axis(fig) # to keep the  current view
  # the seperating hyperplane
  add_plane(fig, w, b)
  # the margin
  # positive examples on the margin have x @ w +b == +1 
  # negative examples on the margin have x @ w +b == -1

  add_plane(fig, w, b + 1, opacity = 0.15) 
  add_plane(fig, w, b - 1, opacity = 0.15) 

In [14]:
def show_iris_solution(x, y, w, b, title  = ''):
  fig = show_iris(x, (y+1)//2)
  add_margin(fig, w, b)
  fig.update_layout(title = {'text' : title})
  fig.show()

Implement a solver for quadratic programming

In [15]:
#  implement quadprog using scipy.optimize.mminimize
def quadprog(Q, c, A = None, b = None, Aeq = None, beq = None, lb = None, ub = None, x0 = None):
  # Min 0.5 * x.T @ Q @ x + c.T @ x
  # s.t. A @ x < = b
  #      Aeq @ x == beq
  #      lb <= x <= ub
  # init x with x0

  if A is None: A = zeros(0, c.shape[0])
  if b is None: b = zeros(0)
  if Aeq is None: Aeq = zeros(0, c.shape[0])
  if beq is None: beq = zeros(0)
  if lb is None: lb = -inf * ones(c.shape[0])
  if ub is None: ub = inf * ones(c.shape[0])
  if x0 is None: x0 = zeros(c.shape[0])

  fun = lambda x: 0.5 * x.T @ Q @ x + c.T @ x
  thebounds = Bounds(lb, ub)
  theconstraints = LinearConstraint(vstack([A, Aeq]),
                                    concatenate([-inf*ones(A.shape[0]), beq]),
                                    concatenate([b, beq]))
  return minimize(fun, x0, 
                  bounds = thebounds, 
                  constraints = theconstraints)

In [16]:
Q = array([[1, -1],[-1, 2]])
f = array([-2, -6])
result = quadprog(Q, f, 
                  A = array([[1, 1], [-1, 2], [2, 1]]),
                  b = array([2, 2, 3]),
                  lb = zeros(2))
print(result.x)
print(result.fun)

[0.66666667 1.33333333]
-8.222222222222197


Implementation of linear SVM
Seperable Case

In [17]:
# use quadprog for implementation
def linear_svm_seperable_case(x, y, threshold = 1e-10):
  n = y.shape[0]
  Q = diag(y) @ x @ x.T @ diag(y)
  c = -ones(n)
  result = quadprog(Q, c, 
                    Aeq = y.reshape(1, n), 
                    beq = zeros(1),
                    lb = zeros(n))
  alpha = result.x
  support_vectors = (alpha > threshold)
  n_support_vectors = support_vectors.sum() # sum up TRUE to get the number of support vectors 
  alpha = alpha * (support_vectors)
  w = (alpha * y) @ x
  b = ((y - x @ w) * support_vectors).sum() / n_support_vectors
  return w, b, alpha

def predict(x, w, b):
  y_predicted = (x @ w + b > 0.0).astype(int)
  return y_predicted

def accuracy(x, y, w, b):
   return (y * (x@w + b) > 0).mean()

Toy data balanced set

In [18]:
# a simple toy data where two classes are drawn from two gaussian distributions
n = 100
np.random.seed(17)
x, y = randn(n, 3), ones(n).astype(int)
x[:n//2, :] += 5.0 # seperate the classes
y[:n//2] *= -1     # label the first half as -1

w, b, alpha = linear_svm_seperable_case(x, y)
print(f"Accuracy = {accuracy(x, y, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

fig = scatter_3d(x = x[:,0], y = x[:,1], z = x[:,2], # x, y, z axis 
                 color = [['class -1', 'class +1'][n] for n in (y+1)//2])
fig['layout']['scene']['aspectmode'] = "data"
add_margin(fig, w, b)
fig.update_layout(title = {'text': "Balanced toy data"})
fig.show()

Accuracy = 1.0
Alpha =
 [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.06587755 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.00246319 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.06834074 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.       

Immplementation using SVC function of sklearn

In [19]:
# SVM Classifier model
from sklearn.svm import SVC

svm_clf = SVC(kernel="linear", C=100)
svm_clf.fit(x, y)

In [20]:
print(svm_clf.coef_)
print(svm_clf.intercept_)
print(svm_clf.support_vectors_)

[[-0.14213431 -0.19430433 -0.28045346]]
[1.58926925]
[[2.03987738 4.20441888 5.28458225]
 [5.53445146 3.59096914 3.94023007]
 [0.08748888 1.33795127 1.1303927 ]]


Imbalanced data

In [21]:
# simple imbalanced data with 95 positive and 5 negative examples

n_pos = 95
n_neg = 5
n = n_pos + n_neg

np.random.seed(17)
x, y = randn(n, 3), ones(n).astype(int)
x[:n_neg, :] += 5.0 # seperate the classes
y[:n_neg] *= -1     # label the first half as -1

w, b, alpha = linear_svm_seperable_case(x, y)
print(f"Accuracy = {accuracy(x, y, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

fig = scatter_3d(x = x[:,0], y = x[:,1], z = x[:,2], # x, y, z axis 
                 color = [['class -1', 'class +1'][n] for n in (y+1)//2])
fig['layout']['scene']['aspectmode'] = "data"
add_margin(fig, w, b)
fig.update_layout(title = {'text': "Imbalanced toy data"})
fig.show()

Accuracy = 1.0
Alpha =
 [0.01810947 0.         0.         0.         0.03645164 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.04029447 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.00538764 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.008879
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0. 

Immplementation using SVC function of sklearn

In [22]:
# SVM Classifier model
from sklearn.svm import SVC

svm_clf = SVC(kernel="linear", C=100)
svm_clf.fit(x, y)

In [23]:
print(svm_clf.coef_)
print(svm_clf.intercept_)
print(svm_clf.support_vectors_)

[[-0.16932274 -0.21568108 -0.18425917]]
[1.60813697]
[[5.27626589 3.14537192 5.62390111]
 [3.18118766 4.86195066 5.53983961]
 [0.52253728 0.08355444 2.72079559]
 [2.05230408 1.14537241 0.07647992]]


In [24]:
iris = load_iris()
i, j, k  = 2, 1, 0  # pick three dimensions, u can try with other dim, this gives nice visualization
x = iris.data[:,[i,j,k]] # take all rows rearrnge the columns since 4d can not be viewed
x_names = iris.feature_names
x_names = [x_names[i], x_names[j], x_names[k]] #'petal length (cm)', 'sepal width (cm)', 'sepal length (cm)'
y = iris.target
y_names = iris.target_names
show_iris(x,y)

In [25]:
# merge Versicolor and Virginica and assign them label +1, for Setosa assign the label as -1
y[y == 0] = -1 # Setosa
y[y == 2] = +1 # virginica

y_names = [iris.target_names[0], iris.target_names[1] + '/' + iris.target_names[2]]
show_iris(x,(y+1)//2) # Remaping (-1, +1) to (0, 1) for the legend

Looks like the data  is well seperable by a linear hyperplane.\
So now we will implement the Maximal Margin Classifier

In [26]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.5, random_state = 12)
x_all = vstack([x_train, x_test])
y_all = hstack([y_train, y_test])
train_test = x_train.shape[0]*['train'] + x_test.shape[0]*['test']

In [27]:
# n = 100
# np.random.seed(17)
# x, y = randn(n, 3), ones(n).astype(int)
# x[:n//2, :] += 5.0 # seperate the classes
# y[:n//2] *= -1     # label the first half as -1

w, b, alpha = linear_svm_seperable_case(x_train, y_train)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

# fig = scatter_3d(x = x_train[:,0], y = x_train[:,1], z = x_train[:,2], # x, y, z axis 
#                  color = [['class -1', 'class +1'][n] for n in (y_train+1)//2])
# fig['layout']['scene']['aspectmode'] = "data"
# add_margin(fig, w, b)
# fig.update_layout(title = {'text': "Balanced toy data"})
# fig.show()

Accuracy = 1.0
Alpha =
 [0.         0.         0.         0.         0.         0.
 0.         0.         0.47284188 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.03109279 0.50393467 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.        ]
w =
 [ 0.91019169 -0.39071062  0.16361752]
b =
 -2.102383320414457


In [28]:
print(f"Accuracy = {accuracy(x_test, y_test, w, b)}")
show_iris_solution(x_test, y_test, w, b, "Test data with SVM hyperplane and margin")

Accuracy = 1.0


Some points lie within the margin, however they are accurately classified.\
Note that, we dont consider the points lying on or within the margin in this case as the support vecctors, since it is only for the training dataset.

In [29]:
# SVM Classifier model
from sklearn.svm import SVC

svm_clf = SVC(kernel="linear", C=100)
svm_clf.fit(x_train, y_train)

In [30]:
print(svm_clf.coef_)
print(svm_clf.intercept_)
print(svm_clf.support_vectors_)

[[ 0.91010159 -0.39057983  0.16368077]]
[-2.10291136]
[[1.7 3.4 5.4]
 [1.6 3.  5. ]
 [3.5 2.6 5.7]]


In [31]:
from sklearn.metrics import accuracy_score
y_pred = svm_clf.predict(x_test)
accuracy_score(y_pred, y_test)

1.0

Implementation of general soft margin SVM for non-linear seperable case

In [32]:
# use quadprog for implementation
def linear_svm(x, y, C, threshold = 1e-10):
  n = y.shape[0]
  Q = diag(y) @ x @ x.T @ diag(y)
  c = -ones(n)
  result = quadprog(Q, c, 
                    Aeq = y.reshape(1, n), 
                    beq = zeros(1),
                    lb = zeros(n),
                    ub = C * ones(n))
  alpha = result.x
  support_vectors = (alpha > threshold)
  n_support_vectors = support_vectors.sum() # sum up TRUE to get the number of support vectors 
  alpha = alpha * (support_vectors)
  w = (alpha * y) @ x
  b = ((y - x @ w) * support_vectors).sum() / n_support_vectors
  return w, b, alpha

# def predict(x, w, b):
#   y_predicted = (x @ w + b > 0.0).astype(int)
#   return y_predicted

# def accuracy(x, y, w, b):
#    return (y * (x@w + b) > 0).mean()

In [33]:
iris = load_iris()
i, j, k  = 2, 1, 0  # pick three dimensions, u can try with other dim, this gives nice visualization
x = iris.data[:,[i,j,k]] # take all rows rearrnge the columns since 4d can not be viewed
x_names = iris.feature_names
x_names = [x_names[i], x_names[j], x_names[k]] #'petal length (cm)', 'sepal width (cm)', 'sepal length (cm)'
y = iris.target
y_names = iris.target_names
show_iris(x,y)

For non-seperable case lets merge Setosa and Versicolor

In [34]:
# merge Versicolor and Virginica and assign them label +1, for Setosa assign the label as -1
y[y == 0] = 1 # Setosa
y[y == 2] = -1 # virginica

y_names = [iris.target_names[0] + '/' +  iris.target_names[1], iris.target_names[2]]
show_iris(x,(y+1)//2) # Remaping (-1, +1) to (0, 1) for the legend

In [35]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.5, random_state = 12)
x_all = vstack([x_train, x_test])
y_all = hstack([y_train, y_test])
train_test = x_train.shape[0]*['train'] + x_test.shape[0]*['test']

In [36]:
w, b, alpha = linear_svm(x_train, y_train, C = 0.05)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

Accuracy = 0.9466666666666667
Alpha =
 [0.05 0.   0.05 0.   0.   0.05 0.   0.   0.   0.05 0.05 0.05 0.05 0.05
 0.05 0.05 0.   0.   0.05 0.05 0.05 0.   0.05 0.   0.   0.   0.05 0.
 0.05 0.05 0.   0.   0.   0.05 0.05 0.05 0.   0.05 0.05 0.   0.   0.05
 0.05 0.05 0.   0.   0.05 0.   0.05 0.   0.   0.05 0.   0.05 0.05 0.05
 0.05 0.   0.   0.   0.   0.05 0.   0.05 0.05 0.   0.05 0.05 0.   0.
 0.05 0.   0.   0.05 0.05]
w =
 [-0.885 -0.075 -0.235]
b =
 6.006437500000003


In [37]:
w, b, alpha = linear_svm(x_train, y_train, C = 0.1)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

Accuracy = 0.9733333333333334
Alpha =
 [0.  0.  0.1 0.  0.  0.1 0.  0.  0.  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.  0.
 0.  0.1 0.1 0.  0.1 0.  0.  0.  0.1 0.  0.1 0.1 0.  0.  0.  0.1 0.1 0.
 0.  0.1 0.1 0.  0.  0.  0.1 0.1 0.  0.  0.1 0.  0.1 0.  0.  0.1 0.  0.1
 0.1 0.1 0.  0.  0.  0.  0.  0.  0.  0.1 0.1 0.  0.1 0.1 0.  0.  0.1 0.
 0.  0.1 0.1]
w =
 [-1.25 -0.06 -0.17]
b =
 7.31505882353524


In [38]:
w, b, alpha = linear_svm(x_train, y_train, C = 1)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

Accuracy = 1.0
Alpha =
 [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.81465395 1.
 0.88481396 0.         1.         0.         0.         0.
 0.         0.         1.         0.         1.         0.
 0.         0.         0.         0.         1.         1.
 0.         0.         0.         0.         1.         0.
 0.         1.         0.         0.         0.         0.
 0.         0.69946791 0.         0.         1.         0.
 0.         0.         0.         1.         0.         1.
 0.         1.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 1.         1.         0.         0.         1.         0.
 0.         0.         0.        ]
w =
 [-3.31968075  0.24690958  0.16460902]
b =
 14.308814164541246


Let's check how the SVM classifier varies with the value of C   

In [39]:
# C = 10
w, b, alpha = linear_svm(x_train, y_train, C = 10)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

Accuracy = 1.0
Alpha =
 [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.         10.          0.          4.71374929  0.
  0.          0.          0.          0.         10.          0.
  0.          0.          0.          0.          0.56860434  0.
  0.          0.17468006  0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.         10.          0.         10.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  6.02953511  0.          0.          0.         10.          0.
  0.          0.          0.        ]
w =
 [-5.46828477  0.93731689  0.625043  ]
b =
 19.81920069028643


In [40]:
# C = 100
w, b, alpha = linear_svm(x_train, y_train, C = 100)
print(f"Accuracy = {accuracy(x_train, y_train, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w =\n {w}")
print(f"b =\n {b}")

show_iris_solution(x_train, y_train, w, b, "Training data with SVM hyperplane and margin")

Accuracy = 1.0
Alpha =
 [  0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
  59.71215685   0.           0.           0.           0.
   0.           0.           0.         100.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.          20.76002165   0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.
  19.5278215    0.           0.           0.           0.        ]
w =
 [-12.07600216   3.88486274   1.72704368]
b =
 36.253618382870286


As you increase the value of the C, the margin of of the classifier shrinks and less number of points falls within the margin or beyond the margin.

In [41]:
# C = 1
w, b, alpha = linear_svm(x_train, y_train, C = 1)

print(f"Accuracy = {accuracy(x_test, y_test, w, b)}")
print(f"Alpha =\n {alpha}")
print(f"w = {w}")
print(f"b = {b}")

show_iris_solution(x_test, y_test, w, b, "Testing data with SVM hyperplane and margin")

Accuracy = 0.9066666666666666
Alpha =
 [0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.81465395 1.
 0.88481396 0.         1.         0.         0.         0.
 0.         0.         1.         0.         1.         0.
 0.         0.         0.         0.         1.         1.
 0.         0.         0.         0.         1.         0.
 0.         1.         0.         0.         0.         0.
 0.         0.69946791 0.         0.         1.         0.
 0.         0.         0.         1.         0.         1.
 0.         1.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 1.         1.         0.         0.         1.         0.
 0.         0.         0.        ]
w = [-3.31968075  0.24690958  0.16460902]
b = 14.308814164541246


# Scikit Learn code

In [1]:
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
cancer = datasets.load_breast_cancer()

In [2]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


In [4]:
cancer.data.shape

(569, 30)

In [5]:
# Import train_test_split function
from sklearn.model_selection import train_test_split

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

In [6]:
#Import svm model
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [7]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9649122807017544
