In [2]:
# Import pandas
import pandas as pd

# Import SVM Classifier
from sklearn.svm import SVC

# Import train test split
from sklearn.model_selection import train_test_split

# Import score metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score

# Import datasets
from sklearn.datasets import load_breast_cancer

In [3]:
# Load cancer data
cancer_data = load_breast_cancer()

# Extract X and y as pandas dataframe
X = pd.DataFrame(cancer_data['data'], columns=cancer_data['feature_names'])
y = pd.Series(cancer_data['target'])

# Print the shape
print(X.shape, y.shape)

(569, 30) (569,)


In [5]:
# Split the data into training and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

# Print the size
print("Shape of training data: {} \nShape of the test data: {}".format(X_train.shape, X_test.shape))

Shape of training data: (455, 30) 
Shape of the test data: (114, 30)


In [13]:
# Build the model
for k in ['linear', 'poly', 'rbf', 'sigmoid']:
    svm_classifier = SVC(kernel=k)

    # fit the training data
    svm_classifier.fit(X_train, y_train)

    # Predict on test data
    y_pred = svm_classifier.predict(X_test)

    # Compute accuracy score
    print("Accuracy for {} SVM: {}".format(k, accuracy_score(y_test, y_pred)))
    print("Precision for {} SVM: {}".format(k, precision_score(y_test, y_pred)))
    print("Recall for {} SVM: {}".format(k, recall_score(y_test, y_pred)))
    print("\n")



Accuracy for linear SVM: 0.9649122807017544
Precision for linear SVM: 0.9506172839506173
Recall for linear SVM: 1.0


Accuracy for poly SVM: 0.9122807017543859
Precision for poly SVM: 0.8941176470588236
Recall for poly SVM: 0.987012987012987


Accuracy for rbf SVM: 0.9122807017543859
Precision for rbf SVM: 0.8941176470588236
Recall for rbf SVM: 0.987012987012987


Accuracy for sigmoid SVM: 0.4824561403508772
Precision for sigmoid SVM: 0.6097560975609756
Recall for sigmoid SVM: 0.6493506493506493




**Interpretation**: In the above, linear kernel seems to perform very well with acuracy of 96% and precision of 95% with recall 100%. This is infact a very good performance of the model.