In [2]:
import numpy as np 
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm 
from sklearn.metrics import accuracy_score 

In [None]:
# To inspect a pandas function 
pd.read_csv?

In [5]:
diabetes_dataset = pd.read_csv('diabetes.csv')

In [6]:
diabetes_dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [7]:
# We need the outcome column as the label (y)
# Seperating the data & labels 

In [7]:
X = diabetes_dataset.drop(columns='Outcome', axis=1)
y = diabetes_dataset['Outcome']

In [None]:
# We need to standardized our dataset, by fitting & transforming
# This will fit our dataset between the range of 0 - 1, thereby improving
# model predictions

In [8]:
scaler = StandardScaler()
standardized_data = scaler.fit(X)
standardized_data = scaler.transform(X)

In [9]:
X = standardized_data

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2)

In [11]:
print(X.shape, X_train.shape, X_test.shape)

(768, 8) (614, 8) (154, 8)


### Training model with SVM (Support vector machine)

In [12]:
classifier = svm.SVC(kernel='linear') # Support vector classifier

In [13]:
classifier.fit(X_train, Y_train)

# Model Evaluation - Check the accuracy of the model prediction

In [None]:
# Accuracy on training data 

In [14]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [15]:
print('Accuracy score:', training_data_accuracy)

Accuracy score: 0.7866449511400652


In [None]:
# Accuracy on test data 

In [16]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [18]:
print('Accuracy score:', test_data_accuracy)

Accuracy score: 0.7727272727272727


# Making a predictive system

In [19]:
input_data = (13,145,82,19,110,22.2,0.245,57)

# Change the input data to numpy array
input_data = np.asarray(input_data)

# Reshape the array for the model to understand since we're predicting for one instace
input_data = input_data.reshape(1, -1)

# Standardized data
input_data = scaler.transform(input_data)
# print(input_data)

prediction = classifier.predict(input_data)

print(prediction)

if prediction[0] == 0:
    print('Non-Diabetic')
else:
    print('Diabetic')

[0]
Non-Diabetic




In [20]:
# Saving the model 

import pickle

filename = "trained_diabetic_model.sav"
pickle.dump(classifier, open(filename, "wb"))

In [20]:
# Test saved model

loaded_model = pickle.load(open(filename, 'rb'))

input_data = (13,145,82,19,110,22.2,0.245,57)

# Change the input data to numpy array
input_data = np.asarray(input_data)

# Reshape the array for the model to understand since we're predicting for one instace
input_data = input_data.reshape(1, -1)

# Standardized data
input_data = scaler.transform(input_data)
# print(input_data)

prediction = loaded_model.predict(input_data)

print(prediction)

if prediction[0] == 0:
    print('Non-Diabetic')
else:
    print('Diabetic')

[0]
Non-Diabetic


