In [None]:
#Step 1: Import modules
#The following modules need to be imported to classify the Breast Cancer dataset using SVM:
import numpy as np
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

#Step 2: Load data
#The load_data function returns the data and the labels:
def load_data():
  Data=load_breast_cancer()
  X=Data.data
  y=Data.target
  return (X, y)

#Step 3: Evaluate performance
#The cal_acc function calculates the accuracy by comparing the predicted values of the labels and the values of the labels of the test data:
def cal_acc(y_test, y_predict):
  tp=0
  tn=0
  fp=0
  fn=0
  s=np.shape(y_test)
  for i in range (s[0]):
    o1=y_predict[i]
    y1=y_test[i]
    if(o1==1 and y1==1):
      tp+=1
    elif(o1==0 and y1==0):
      tn+=1
    elif(o1==1 and y1==0):
      fp+=1
    else:
      fn+=1
  acc=(tp+tn)/(tp+tn+fp+fn)*100
  return(acc)

#Step 4: The model
#The following code makes use of the above functions to classify the data:
X, y=load_data()
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.3, random_state=4)
clf=SVC(kernel='linear') #gamma='auto'
clf.fit(X_train, y_train)
y_predict=clf.predict(X_test)
accuracy=cal_acc(y_test, y_predict)


In [None]:
#Experiment 2: Classification of Breast Cancer Dataset using SVM, Linear Kernel, K-Fold
#The load_data and cal_acc functions of Experiment 1 are used in the following code. The reader is expected to write the functions again. The code that uses K-Fold split and calculates the average accuracy is as follows:
kf=KFold(n_splits=5)
kf
kf.get_n_splits(X)
acc=[]
for train_i,test_i in kf.split(X):
  X_train,X_test=X[train_i],X[test_i]
  y_train,y_test=y[train_i],y[test_i]
  clf=SVC(kernel='linear') #gamma='auto'
  clf.fit(X_train, y_train)
  y_predict=clf.predict(X_test)
  accuracy=cal_acc(y_test, y_predict)
  acc.append(accuracy)
print(np.mean(acc))


In [None]:
#Experiment 3:
#The following code classifies two sets of images using SVM. The required modules can be imported using the following code:
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split

#The images can be converted to grayscale using the following function:
def rgb2gray(rgb):
  return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

#The perf_measure function finds the performance of the model. It takes test_y and y_predicted as parameters:
def perf_measure(test_y,y_predicted):
tp=0
tn=0
fp=0
fn=0
for i in range(len(test_y)):
  predicted=y_predicted[i]
  actual=test_y[i]
  if(predicted==actual):
    if(predicted==1):
    tp+=1
  else:
    tn+=1
  else:
  if(predicted==1):
    fp+=1
  else:
    fn+=1
acc=(tp+tn)/  (tp+tn+fp+fn)
sens=(tp)/(tp+fn)
spec=(tn)/(tn+fp)
return (acc,sens,spec)

#Suppose you have 20 images of class I and 20 images of class II, the following code would help you to classify the images, assuming that you have saved the image data in final_data. Note that the following code uses a linear kernel:
y1=np.zeros((20,1))
y2=np.ones((20,1))
y=np.vstack((y1,y2))
train_X,test_X,train_y,test_y=train_test_split(final_data, y, test_size=0.3)
clf = svm.SVC(kernel="linear")
clf.fit(train_X,train_y)
y_predicted=clf.predict(test_X)
acc,sens,spec=perf_measure(test_y,y_predicted)
