# Support Vector Classification for Binary Classification 

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [2]:
#Importing the dataset
df = pd.read_csv('Diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.shape

(768, 9)

In [4]:
##To determine how many '1' and '0' values exist in the Outcome
## 1 -> Diabetic
## 0 -> Not Diabetic
df['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [5]:
## determine the average determinants of diabetic women.
## Not going to have more than 3 kids lol
df.groupby('Outcome').mean()

Unnamed: 0_level_0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
Outcome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3.298,109.98,68.184,19.664,68.792,30.3042,0.429734,31.19
1,4.865672,141.257463,70.824627,22.164179,100.335821,35.142537,0.5505,37.067164


In [6]:
##Feature Scaling the training data
x = df.drop(columns='Outcome', axis=1)
scaler = StandardScaler()

In [7]:
std_data = scaler.fit_transform(x)

In [8]:
## Defining X and y as Determinants and Results respectively
X = std_data
y = df['Outcome']

In [9]:
##Splitting data into Training and Test Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2)

In [10]:
##Model Specification
classifier = svm.SVC(kernel = 'linear')
classifier.fit(X_train, y_train)

SVC(kernel='linear')

In [11]:
##Checking the training data accuracy
X_train_prediction = classifier.predict(X_train)
train_accuracy = accuracy_score(X_train_prediction, y_train)
print(f'The training data accuracy is\n {train_accuracy*100}%')

The training data accuracy is
 78.66449511400651%


In [12]:
##Checking the test data accuracy
X_test_prediction = classifier.predict(X_test)
test_accuracy = accuracy_score(X_test_prediction, y_test)
print(f'The test data accuracy is\n {test_accuracy*100}%')

The test data accuracy is
 77.27272727272727%


In [13]:
##Testing the Model 

input_data = (8,183,64,0,0,23.3,0.672,32)
array_input = np.asarray(input_data)
array=array_input.reshape(1, -1)
arr= scaler.transform(array)
prediction = classifier.predict(arr)


if prediction == 0:
    print('The prediction is a Non diabetic')
else:
    print('The prediction is a Diabetic')

The prediction is a Diabetic


In [14]:
##Testing the Model 

input_data = (4,110,92,0,0,37.6,0.191,30)
array_input = np.asarray(input_data)
array=array_input.reshape(1, -1)
arr= scaler.transform(array)
prediction = classifier.predict(arr)


if prediction == 0:
    print('The prediction is a Non diabetic')
else:
    print('The prediction is a Diabetic')

The prediction is a Non diabetic


In [15]:
##Testing the Model 

input_data = (1,103,30,38,83,43.3,0.183,33)
array=array_input.reshape(1, -1)
arr= scaler.transform(array)
prediction = classifier.predict(arr)


if prediction == 0:
    print('The prediction is a Non diabetic')
else:
    print('The prediction is a Diabetic')

The prediction is a Non diabetic
