In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
import pickle
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset = pd.read_excel('PPG-BP dataset.xlsx') 

In [3]:
dataset.head(2)

Unnamed: 0,Num.,subject_ID,Sex_M_F,Age_year,Height_cm,Weight_kg,Systolic Blood Pressure_mmHg,Diastolic Blood Pressure_mmHg,Heart_Rate_b_m,BMI_kg_m2,Hypertension,Diabetes,cerebral_infarction,cerebrovascular_disease
0,1,2,Female,45,152,63,161,89,97,27.268006,Stage 2 hypertension,,,
1,2,3,Female,50,157,50,160,93,76,20.284799,Stage 2 hypertension,,,


In [4]:
dataset['Diabetes'].value_counts()

Type 2 Diabetes    37
Diabetes            1
Name: Diabetes, dtype: int64

In [5]:
dataset.loc[dataset['Diabetes'] == "Type 2 Diabetes", 'Diabetes'] = 1
dataset.loc[dataset['Diabetes'] == "Diabetes", 'Diabetes'] = 1

dataset.loc[dataset['Sex_M_F'] == "Male", 'Sex_M_F'] = 1
dataset.loc[dataset['Sex_M_F'] == "Female", 'Sex_M_F'] = 2

In [6]:
dataset['Diabetes'].value_counts()

1    38
Name: Diabetes, dtype: int64

In [7]:
# dataset = dataset.fillna(0)
dataset = dataset.drop(['Num.', 'subject_ID'], axis=1)

In [8]:
dataset.head(2)

Unnamed: 0,Sex_M_F,Age_year,Height_cm,Weight_kg,Systolic Blood Pressure_mmHg,Diastolic Blood Pressure_mmHg,Heart_Rate_b_m,BMI_kg_m2,Hypertension,Diabetes,cerebral_infarction,cerebrovascular_disease
0,2,45,152,63,161,89,97,27.268006,Stage 2 hypertension,,,
1,2,50,157,50,160,93,76,20.284799,Stage 2 hypertension,,,


In [9]:
dataset['cerebrovascular_disease'].value_counts()

insufficiency of cerebral blood supply    15
cerebrovascular disease                   10
Name: cerebrovascular_disease, dtype: int64

In [10]:
le = LabelEncoder()

dataset['Hypertension']            = le.fit_transform(dataset['Hypertension'])
dataset['cerebral_infarction']     = le.fit_transform(dataset['cerebral_infarction'])
dataset['cerebrovascular_disease'] = le.fit_transform(dataset['cerebrovascular_disease'])

# dataset = dataset.fillna(0)
dataset.head(2)

Unnamed: 0,Sex_M_F,Age_year,Height_cm,Weight_kg,Systolic Blood Pressure_mmHg,Diastolic Blood Pressure_mmHg,Heart_Rate_b_m,BMI_kg_m2,Hypertension,Diabetes,cerebral_infarction,cerebrovascular_disease
0,2,45,152,63,161,89,97,27.268006,3,,1,2
1,2,50,157,50,160,93,76,20.284799,3,,1,2


In [11]:
dataset = dataset.fillna(0)

In [12]:
dataset['Diabetes'].value_counts()

0    181
1     38
Name: Diabetes, dtype: int64

In [13]:
dataset.groupby('Diabetes').mean()

Unnamed: 0_level_0,Sex_M_F,Age_year,Height_cm,Weight_kg,Systolic Blood Pressure_mmHg,Diastolic Blood Pressure_mmHg,Heart_Rate_b_m,BMI_kg_m2,Hypertension,cerebral_infarction,cerebrovascular_disease
Diabetes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,1.524862,56.751381,161.292818,59.624309,127.281768,71.519337,73.756906,22.869509,0.950276,0.889503,1.80663
1,1.526316,59.157895,160.921053,62.894737,131.105263,73.421053,73.078947,24.240552,1.078947,1.0,2.0


In [14]:
X = dataset.drop(columns = 'Diabetes', axis=1)
Y = dataset['Diabetes']

In [15]:
scaler = StandardScaler()

In [16]:
scaler.fit(X)

In [17]:
standardized_data = scaler.transform(X)

In [18]:
X = standardized_data
Y = dataset['Diabetes']

In [19]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)

In [20]:
classifier = svm.SVC(kernel='linear')

In [21]:
classifier.fit(X_train, Y_train)

In [22]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [23]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [24]:
input_data = (2, 45, 152, 63, 161, 90, 96, 25, 3, 1, 2)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print('The person is not diabetic')
else:
  print('The person is diabetic')

[[ 0.95097204 -0.76833671 -1.12758724  0.23679803  1.62581614  1.63729046
   2.08699338  0.47370416  2.15498436  0.31702131  0.33647625]]
[0]
The person is not diabetic




In [25]:
import pickle

In [26]:
filename = "trained_model_ppg.pkl"
pickle.dump(classifier, open(filename, 'wb'))

In [27]:
#loading save model
loaded_model = pickle.load(open('trained_model_ppg.pkl', 'rb'))

In [28]:
input_data = (2, 45, 152, 63, 161, 90, 96, 25, 3, 1, 2)

# changing the input_data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print('The person is not diabetic')
else:
  print('The person is diabetic')

[[ 0.95097204 -0.76833671 -1.12758724  0.23679803  1.62581614  1.63729046
   2.08699338  0.47370416  2.15498436  0.31702131  0.33647625]]
[0]
The person is not diabetic


