In [6]:
# Installing dependencies 
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

In [5]:
# Load the dataset into dataframe 
diabetes_df = pd.read_csv("diabetes_three.csv")
diabetes_df.head()

Unnamed: 0,pregnant,glucose,pressure,triceps,insulin,mass,pedigree,age,diabetes
0,6,148,72,35,0,33.6,0.627,50,1
1,8,183,64,0,0,23.3,0.672,32,1
2,0,137,40,35,168,43.1,2.288,33,1
3,5,116,74,0,0,25.6,0.201,30,0
4,3,78,50,32,88,31.0,0.248,26,1


In [7]:
# Lookup for missing values 
diabetes_df.isnull().sum()

pregnant    0
glucose     0
pressure    0
triceps     0
insulin     0
mass        0
pedigree    0
age         0
diabetes    0
dtype: int64

In [8]:
# Seperate the data into features(X) and target(y)
X = diabetes_df.drop(columns='diabetes',axis=1)
y = diabetes_df['diabetes']
print(X.shape, y.shape)

(614, 8) (614,)


In [9]:
# Check the standardability of the features
print(X.std())

pregnant      3.421484
glucose      32.326810
pressure     19.757137
triceps      15.784502
insulin     117.910600
mass          8.058937
pedigree      0.344738
age          11.894135
dtype: float64


In [10]:
# Initialize the Standard Scaler
scaler = StandardScaler()

In [13]:
# Fit and transform the features to a common range
X = scaler.fit_transform(X)

# Confirm the standardized data 
print(X.std())

1.0


In [14]:
# Split the dataset into train and test 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, stratify=y)

In [15]:
# Shape of the train and test data
print(X.shape, X_train.shape, X_test.shape)

(614, 8) (491, 8) (123, 8)


In [16]:
# Initialize the classifier 
classifier = SVC(kernel='linear')

In [17]:
# Train the classifier on training dataset 
classifier.fit(X_train, y_train)

In [27]:
# Evaluate how best the model can predict 
y_train_prediction = classifier.predict(X_train)
y_test_prediction = classifier.predict(X_test)

# Evaluate with accuracy score 
accur_score_train = accuracy_score(y_train, y_train_prediction)
print(f"Accuracy score for training data = {accur_score_train}")

accur_score_test = accuracy_score(y_test, y_test_prediction)
print(f"Accuracy score for testing data = {accur_score_test}")
print(y_train_prediction)


Accuracy score for training data = 0.780040733197556
Accuracy score for testing data = 0.7560975609756098
[1 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1
 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1
 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 1
 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 1 0 0
 1 1 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0
 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 0 1 0 1 0 0 1
 1 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1
 1 0 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0
 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 

In [25]:
# Taking an input data for prediction from the users 
input_data = (4,110,92,0,0,37.6,0.191,30)

# Convert the input data as np array
input_data_array = np.asarray(input_data)

# Reshape the data since we are only predicting for a single instance
reshaped_input_data = input_data_array.reshape(1, -1)

# transform data into std data
std_data = scaler.transform(reshaped_input_data)

# Now make the prediction
prediction_outcome = classifier.predict(std_data)

print(prediction_outcome)

[1]
