In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("data/iris.csv")

In [3]:
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
X=data.drop(['Id','Species'],axis=1)
y=data['Species']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=30, stratify=y)

In [6]:
from sklearn.svm import SVC  #SVC - Supoprt Vector Classifier
cls=SVC()

In [7]:
from sklearn.model_selection import GridSearchCV
parameters = {'C':[0.001,0.1,10], 'gamma':[0.1,0.01],'kernel':['rbf','linear']}

#C - Regularization - squared l2 error is calculated
#gamma - Fit of the model
#kernal - kernal to be used 

In [8]:
grid=GridSearchCV(cls, parameters,cv=5)
grid.fit(X_train, y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.001, 0.1, 10], 'gamma': [0.1, 0.01],
                         'kernel': ['rbf', 'linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [9]:
#to get the best parameter for the model
grid.best_params_

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

In [10]:
svclassifier=SVC(C=10,gamma=0.1,kernel='rbf',probability=True)
svclassifier.fit(X_train,y_train)

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [11]:
ypred=svclassifier.predict(X_test)

In [12]:
#accuracy
from sklearn.metrics import accuracy_score
accuracy_score(y_test,ypred)

1.0

In [13]:
#confusison matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,ypred)

array([[10,  0,  0],
       [ 0, 10,  0],
       [ 0,  0, 10]], dtype=int64)

In [14]:
from sklearn.metrics import roc_auc_score
rfc_probs = svclassifier.predict_proba(X_test)
rfc_probs

array([[0.01647921, 0.02074013, 0.96278065],
       [0.00895319, 0.01663609, 0.97441072],
       [0.96628697, 0.02327394, 0.01043909],
       [0.01567863, 0.97754971, 0.00677166],
       [0.01780458, 0.97773208, 0.00446334],
       [0.97587434, 0.01453634, 0.00958932],
       [0.97276326, 0.01687162, 0.01036512],
       [0.96950138, 0.02023171, 0.01026691],
       [0.01627007, 0.02258537, 0.96114456],
       [0.01119334, 0.9358319 , 0.05297476],
       [0.0087304 , 0.98341357, 0.00785602],
       [0.01009708, 0.00733651, 0.98256641],
       [0.00841105, 0.98507698, 0.00651198],
       [0.01123768, 0.0027286 , 0.98603372],
       [0.969907  , 0.02012838, 0.00996462],
       [0.96581964, 0.02289867, 0.01128169],
       [0.93368764, 0.0532307 , 0.01308166],
       [0.01833728, 0.54518153, 0.43648119],
       [0.97232388, 0.01809299, 0.00958313],
       [0.01264904, 0.72037639, 0.26697457],
       [0.02249781, 0.1608293 , 0.81667289],
       [0.96346824, 0.02584423, 0.01068753],
       [0.

In [15]:
from sklearn.metrics import classification_report
print(classification_report(y_test,ypred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [44]:
import warnings
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
def versiontuple(version):
    return tuple(map(int, (version.split("."))))
def decision_plot(X, y, classifier, test_idx=None, resolution=0.02):
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'green', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    # plot the decision surface
    x1min, x1max = X['SepalLengthCm'].min() - 1, X['SepalLengthCm'].max() + 1
    x2min, x2max = X['SepalWidthCm'].min() - 1, X['SepalWidthCm'].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1min, x1max, resolution),
                           np.arange(x2min, x2max, resolution))
    #Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = classifier.predict(X)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)