# Support Vector Machine (SVM)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import joblib
from sklearn.metrics import confusion_matrix,accuracy_score


## Importing the dataset

In [2]:
dataset = pd.read_csv(r'C:\Users\TESS/Desktop/New folder/Diabetes_data.csv')

In [3]:
dataset.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [4]:
dataset = dataset[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'Age','DiabetesPedigreeFunction',  'Outcome']]
dataset

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age,DiabetesPedigreeFunction,Outcome
0,6,148,72,35,0,33.6,50,0.627,1
1,1,85,66,29,0,26.6,31,0.351,0
2,8,183,64,0,0,23.3,32,0.672,1
3,1,89,66,23,94,28.1,21,0.167,0
4,0,137,40,35,168,43.1,33,2.288,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,63,0.171,0
764,2,122,70,27,0,36.8,27,0.340,0
765,5,121,72,23,112,26.2,30,0.245,0
766,1,126,60,0,0,30.1,47,0.349,1


In [5]:
X = dataset.drop('Outcome',axis=1)
y = dataset['Outcome']

## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   Age                       768 non-null    int64  
 7   DiabetesPedigreeFunction  768 non-null    float64
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


## Training the SVM model on the Training set

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
svc = SVC(class_weight='balanced')
param_grid_svc = {'C':[0.001, 0.01, 0.1, 0.5, 1],'gamma':['scale', 'auto']}
grid_svc_model = GridSearchCV(svc, param_grid=param_grid_svc)
grid_svc_model.fit(X_test,y_test);
grid_svc_model.best_params_
svc_pred = grid_svc_model.predict(X_test)

In [10]:
grid_svc_model.best_params_


{'C': 1, 'gamma': 'scale'}

In [11]:
acc_score=grid_svc_model.score(X_test,y_test)
acc_score

0.8645833333333334

## Making the Confusion Matrix

In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = grid_svc_model.predict(X_test)
cm = confusion_matrix(y_test, svc_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[108  22]
 [  4  58]]


0.8645833333333334

In [13]:
y = grid_svc_model.predict(sc.transform([[6, 148, 72, 35, 0, 33.6, 0.627, 50]]))
y[0]



1

## Saving the Model

In [14]:
joblib.dump(grid_svc_model,'Diabetes-Melitus')

['Diabetes-Melitus']

In [15]:
joblib.dump(sc,'Diabetes_scaler.gz')

['Diabetes_scaler.gz']

In [None]:
model=joblib.load('Diabetes-Melitus')

In [None]:
model_scaler=joblib.load('Diabetes_scaler.gz')

In [None]:
y1 = model.predict(model_scaler.transform([[6, 148, 72, 35, 0, 33.6, 0.627, 50]]))
y1[0]