## SVM with python

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
cols = ["sepal_length","sepal_width","petal_length","petal_width","label"]

In [3]:
dataset = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",header=None)
dataset.columns = cols

In [4]:
dataset.shape

(150, 5)

In [5]:
dataset.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
# Encoding the Dependent Variable
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
dataset["label"] = labelencoder_y.fit_transform(dataset["label"])

In [7]:
dataset.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [8]:
#shuffle the dataset
from sklearn.utils import shuffle
dataset = shuffle(dataset)
dataset = dataset.reset_index(drop=True)

In [9]:
X = dataset.iloc[:, 0:4].values
y = dataset.iloc[:, 4:].values

In [10]:
X.shape

(150, 4)

In [40]:
y.shape

(150, 1)

In [19]:
#Split Data
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [14]:
print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

(120, 4)
(120, 1)
(30, 4)
(30, 1)


In [42]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

In [50]:
#training with SVC models
from sklearn.svm import SVC ,LinearSVC
C = 0.4  # SVM regularization parameter


while (C <= 1) :
    
    print ("For C" , C)
    
    linear_model = SVC(kernel='linear', C=C)
    linear_model.fit(x_train, y_train.ravel())


    #Calculate Test Prediction
    y_pred = linear_model.predict(x_test)
    print("linear model score :" , linear_model.score(x_test,y_test.ravel()))

    # LinearSVC (linear kernel)
    linearSVC_model = LinearSVC(C=C)
    linearSVC_model.fit(x_train, y_train.ravel())


    #Calculate Test Prediction
    y_pred = linearSVC_model.predict(x_test)
    print("LinearSVC (linear kernel) model score :" , linearSVC_model.score(x_test,y_test.ravel()))

    # # SVC with RBF kernel
    linearSVCRBF_model = SVC(kernel='rbf', gamma=0.7, C=C)
    linearSVCRBF_model.fit(x_train, y_train.ravel())


    #Calculate Test Prediction
    y_pred = linearSVCRBF_model.predict(x_test)
    print("SVC with RBF kernel model score :" , linearSVCRBF_model.score(x_test,y_test.ravel()))



    # SVC with polynomial (degree 3) kernel
    poly_model = SVC(kernel='poly', degree=2, C=C)
    poly_model.fit(x_train, y_train.ravel())


    #Calculate Test Prediction
    y_pred = poly_model.predict(x_test)
    print("SVC with polynomial (degree 3) kernel model score :" , poly_model.score(x_test,y_test.ravel()))
    
    print("-------------------------------------------------------------------------------------------")
    C = C + 0.2

For C 0.4
linear model score : 0.9666666666666667
LinearSVC (linear kernel) model score : 0.9333333333333333
SVC with RBF kernel model score : 0.9666666666666667
SVC with polynomial (degree 3) kernel model score : 0.9666666666666667
-------------------------------------------------------------------------------------------
For C 0.6000000000000001
linear model score : 0.9666666666666667
LinearSVC (linear kernel) model score : 0.9333333333333333
SVC with RBF kernel model score : 0.9666666666666667
SVC with polynomial (degree 3) kernel model score : 0.9666666666666667
-------------------------------------------------------------------------------------------
For C 0.8
linear model score : 0.9666666666666667
LinearSVC (linear kernel) model score : 0.9333333333333333
SVC with RBF kernel model score : 0.9666666666666667
SVC with polynomial (degree 3) kernel model score : 0.9666666666666667
-------------------------------------------------------------------------------------------
For C 1.0


In [46]:
#to get the predictions in categorical form
labelencoder_y.inverse_transform(y_pred)

  if diff:


array(['Iris-setosa', 'Iris-setosa', 'Iris-virginica', 'Iris-setosa',
       'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-setosa', 'Iris-virginica', 'Iris-setosa',
       'Iris-versicolor', 'Iris-setosa', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-virginica',
       'Iris-virginica'], dtype=object)

In [55]:
from sklearn.model_selection import KFold # import KFold
kf = KFold(n_splits=10) # Define the split - into 2 folds 
kf.get_n_splits(X) # returns the number of splitting iterations in the cross-validator
print(kf)

KFold(n_splits=10, random_state=None, shuffle=False)


In [56]:
for train_index, test_index in kf.split(X):
     #print("TRAIN:", train_index, "TEST:", test_index)
     x_train, x_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]

In [59]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = model, X = x_train, y = y_train.ravel(), cv = 10 , scoring="accuracy")
print("mean", accuracies.mean()) #to get mean of all acurracies
print("standard deviation" ,accuracies.std()) #to get standard deviation of all accuracies

mean 0.9707142857142858
standard deviation 0.04818767637264752


In [65]:
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
accuracies = cross_val_score(estimator = model, X = x_train, y = y_train.ravel(), cv = loo , scoring="accuracy")
print("mean", accuracies.mean()) #to get mean of all acurracies
print("standard deviation" ,accuracies.std()) #to get standard deviation of all accuracies

mean 0.9777777777777777
standard deviation 0.14740554623801774
