# SVM and Kernel SVM for Diabete Classification

By Yaohang Li

In [1]:
import pandas as pd
from sklearn.svm import SVC # Import SVM Classifier
from sklearn.svm import LinearSVC # Import Linear-SVM Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

## Load and inspect the dataset

In [2]:
col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
# load dataset
pima = pd.read_csv("diabetes.csv", names=col_names)
pima = pima[1:]

In [3]:
pima.head()

Unnamed: 0,pregnant,glucose,bp,skin,insulin,bmi,pedigree,age,label
1,6,148,72,35,0,33.6,0.627,50,1
2,1,85,66,29,0,26.6,0.351,31,0
3,8,183,64,0,0,23.3,0.672,32,1
4,1,89,66,23,94,28.1,0.167,21,0
5,0,137,40,35,168,43.1,2.288,33,1


## Split the dataset into a training set and a test set

In [4]:
#split dataset in features and target variable
feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
X = pima[feature_cols] # Features
y = pima.label # Target variable

In [5]:
from sklearn import preprocessing

## rescale the data

In [6]:
x = pima.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
pima = pd.DataFrame(x_scaled)

In [7]:
X = min_max_scaler.fit_transform(X)

## split the dataset

In [8]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=6) # 70% training and 30% test

## Linear SVM

In [9]:
# Create SVM
svc = LinearSVC()

# Train SVM
svc = svc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = svc.predict(X_test)

In [10]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7748917748917749


## Polynomial Kernel

In [25]:
# Create kernel SVM
svc = SVC(kernel='poly', degree=3)

# Train kernel SVM classifier
svc = svc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = svc.predict(X_test)

In [26]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.8051948051948052


## Gaussian Kernel

In [13]:
# Create kernel SVM
svc = SVC(kernel='rbf')

# Train kernel SVM
svc = svc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = svc.predict(X_test)

In [14]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7965367965367965


## Sigmoid Kernel

In [15]:
# Create kernel SVM
svc = SVC(kernel='sigmoid')

# Train kernel SVM
svc = svc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = svc.predict(X_test)

In [16]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.45021645021645024
