In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
# Load the breast cancer dataset (binary classification)
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

In [3]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Create SVM classifier
svm_classifier = SVC()

# Train the classifier
svm_classifier.fit(X_train, y_train)

SVC()

In [9]:
# Predict using the trained model
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9766081871345029


In [10]:
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1])

## Cross Validation

In [13]:
# cross validation using cross_val_score
from sklearn.model_selection import cross_val_score
cross_val_score(svm_classifier, X_train, y_train, cv=5, scoring='accuracy')

array([0.975     , 0.9625    , 0.9875    , 0.97468354, 0.93670886])

In [14]:
# cross validation using cross_val_score
from sklearn.model_selection import cross_val_score
cross_val_score(svm_classifier, X_train, y_train, cv=5, scoring='accuracy').mean()

0.9672784810126581

## Kernal Function

In [19]:
## kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} or callable, default=’rbf’

# Create SVM classifier
svm_classifier = SVC(kernel= 'linear')

# Train the classifier
svm_classifier.fit(X_train, y_train)

SVC(kernel='linear')

In [20]:
# Predict using the trained model
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9766081871345029


## GridSearchCV

In [30]:
# gridsearchcv
param_grid = {
    "kernel": ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    "C": [0.001, 0.01, 0.1, 1, 10, 100]
}

In [31]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(kernel='linear'),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
             scoring='accuracy')

In [32]:
grid.best_score_

0.9748417721518987

In [33]:
grid.best_params_

{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}

##### Basically, if you have a multi-class problem with plenty of data predict_proba as suggested earlier works well. 
##### Otherwise, you may have to make do with an ordering that doesn't yield probability scores from decision_function.
Probability = "True"