# **Data Model to Predict Diabetes**

#**Importing the Libraries**

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# **Data Collection**

In [None]:
data_set = pd.read_csv('/content/drive/MyDrive/diabetes_data.csv')
data_set

Unnamed: 0,Age,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Outcome
0,21,108,62,32,56,25.2,0.128,0
1,21,137,68,14,148,24.8,0.143,0
2,21,89,66,23,94,28.1,0.167,0
3,21,139,62,17,210,22.1,0.207,0
4,21,99,76,15,51,23.2,0.223,0
...,...,...,...,...,...,...,...,...
387,60,129,90,7,326,19.6,0.582,0
388,60,181,68,36,495,30.1,0.615,1
389,61,142,60,33,190,28.8,0.687,0
390,63,101,76,48,180,32.9,0.171,0


# **Data Preparation**

## **Data Separation**

In [None]:
Y = data_set['Outcome']
X = data_set.drop('Outcome', axis = 1)

## **Standardization and Scaling**

In [None]:
scaler = StandardScaler()
scaler.fit(X)
standardized_data = scaler.transform(X)
print(standardized_data)

[[-0.9682991  -0.47459086 -0.69416397 ... -0.84300375 -1.12360354
  -1.14490437]
 [-0.9682991   0.46631407 -0.21340023 ... -0.06787532 -1.18059423
  -1.10143204]
 [-0.9682991  -1.09104581 -0.37365481 ... -0.52284201 -0.710421
  -1.03187632]
 ...
 [ 2.95798221  0.62853906 -0.85441855 ...  0.28598766 -0.61068728
   0.47516437]
 [ 3.15429628 -0.70170584  0.42761809 ...  0.20173457 -0.02653266
  -1.0202837 ]
 [ 4.92112287  0.36897908  0.26736351 ... -0.80930251 -1.02386982
  -0.18271685]]


In [None]:
X = standardized_data
Y = data_set['Outcome']

## **Data Splitting**

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)


# **Training Model**

In [None]:
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, Y_train)

## **Finding the Accuracy**

In [None]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print(training_data_accuracy)

0.8178913738019169


In [None]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print(test_data_accuracy)

0.7721518987341772


# **Final Model Testing**

In [None]:
input_data = (35,186,84,42,89,35,0.286)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshaped)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0]==0):
  print('The person is not diabetic\n')
else:
  print('The person is diabetic\n')