Bajrang_hospital

In [1]:
import os
from typing import Dict, Any
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix,roc_auc_score
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline
from sklearn.impute import SimpleImputer
import joblib

In [3]:
data=pd.read_csv('Data/bajrang_hospital_health_risk.csv')

In [4]:
data

Unnamed: 0,patient_id,patient_name,gender,age,bmi,blood_pressure,cholesterol,blood_sugar,heart_rate,diabetes_history,smoking,alcohol,exercise_level,risk
0,1000,Mia,M,53,29.8,175,273,154,62,1,0,0,Medium,0
1,1001,Aditya,M,24,28.6,163,287,167,65,1,1,1,Low,1
2,1002,Isabella,M,67,23.9,102,291,172,70,0,0,1,High,0
3,1003,Arjun,M,70,31.9,144,246,136,88,0,0,0,Low,1
4,1004,Ananya,M,80,18.5,149,197,118,110,0,0,0,Low,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,1295,Abigail,M,26,26.8,167,180,156,92,1,1,0,Medium,0
296,1296,Saanvi,M,54,34.6,111,193,108,79,1,1,1,Low,0
297,1297,Diya,M,27,29.5,175,187,170,90,1,0,1,Medium,1
298,1298,Olivia,F,67,34.8,161,223,197,68,1,1,1,Medium,1


In [5]:
data.columns

Index(['patient_id', 'patient_name', 'gender', 'age', 'bmi', 'blood_pressure',
       'cholesterol', 'blood_sugar', 'heart_rate', 'diabetes_history',
       'smoking', 'alcohol', 'exercise_level', 'risk'],
      dtype='object')

In [6]:
data.isna().sum()

patient_id          0
patient_name        0
gender              0
age                 0
bmi                 0
blood_pressure      0
cholesterol         0
blood_sugar         0
heart_rate          0
diabetes_history    0
smoking             0
alcohol             0
exercise_level      0
risk                0
dtype: int64

In [7]:
data.dtypes

patient_id            int64
patient_name         object
gender               object
age                   int64
bmi                 float64
blood_pressure        int64
cholesterol           int64
blood_sugar           int64
heart_rate            int64
diabetes_history      int64
smoking               int64
alcohol               int64
exercise_level       object
risk                  int64
dtype: object

In [8]:
data.drop({'patient_id','patient_name'},axis=1,inplace=True)

In [9]:
data['gender'] = data['gender'].map({'M': 1, 'F': 0})
data['exercise_level'] = data['exercise_level'].map({'Low': 0, 'Medium': 1, 'High': 2})

In [10]:
data.isna().sum()

gender              0
age                 0
bmi                 0
blood_pressure      0
cholesterol         0
blood_sugar         0
heart_rate          0
diabetes_history    0
smoking             0
alcohol             0
exercise_level      0
risk                0
dtype: int64

In [11]:
X = data[['gender','age','bmi','blood_pressure','cholesterol','blood_sugar','heart_rate',
      'diabetes_history','smoking','alcohol','exercise_level']]
y = data['risk']

In [12]:
data.head(10)

Unnamed: 0,gender,age,bmi,blood_pressure,cholesterol,blood_sugar,heart_rate,diabetes_history,smoking,alcohol,exercise_level,risk
0,1,53,29.8,175,273,154,62,1,0,0,1,0
1,1,24,28.6,163,287,167,65,1,1,1,0,1
2,1,67,23.9,102,291,172,70,0,0,1,2,0
3,1,70,31.9,144,246,136,88,0,0,0,0,1
4,1,80,18.5,149,197,118,110,0,0,0,0,0
5,0,64,19.7,138,251,200,91,0,0,1,2,0
6,0,32,30.9,165,228,146,92,0,0,0,2,0
7,0,24,27.8,129,207,198,72,0,0,1,1,0
8,1,80,30.4,112,214,187,99,1,1,1,0,0
9,0,62,24.2,103,152,200,78,1,1,0,0,1


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)

In [16]:
data.head()

Unnamed: 0,gender,age,bmi,blood_pressure,cholesterol,blood_sugar,heart_rate,diabetes_history,smoking,alcohol,exercise_level,risk
0,1,53,29.8,175,273,154,62,1,0,0,1,0
1,1,24,28.6,163,287,167,65,1,1,1,0,1
2,1,67,23.9,102,291,172,70,0,0,1,2,0
3,1,70,31.9,144,246,136,88,0,0,0,0,1
4,1,80,18.5,149,197,118,110,0,0,0,0,0


In [17]:
X = data.drop(columns=['risk'])
y = data['risk']

In [18]:
X

Unnamed: 0,gender,age,bmi,blood_pressure,cholesterol,blood_sugar,heart_rate,diabetes_history,smoking,alcohol,exercise_level
0,1,53,29.8,175,273,154,62,1,0,0,1
1,1,24,28.6,163,287,167,65,1,1,1,0
2,1,67,23.9,102,291,172,70,0,0,1,2
3,1,70,31.9,144,246,136,88,0,0,0,0
4,1,80,18.5,149,197,118,110,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
295,1,26,26.8,167,180,156,92,1,1,0,1
296,1,54,34.6,111,193,108,79,1,1,1,0
297,1,27,29.5,175,187,170,90,1,0,1,1
298,0,67,34.8,161,223,197,68,1,1,1,1


In [19]:
y

0      0
1      1
2      0
3      1
4      0
      ..
295    0
296    0
297    1
298    1
299    1
Name: risk, Length: 300, dtype: int64

In [20]:
auc = roc_auc_score(y_test,y_pred)

In [21]:
auc

0.546875

In [22]:
X_train

Unnamed: 0,gender,age,bmi,blood_pressure,cholesterol,blood_sugar,heart_rate,diabetes_history,smoking,alcohol,exercise_level
44,1,65,31.3,114,205,125,98,0,1,0,0
85,0,47,20.2,152,164,180,92,1,1,0,1
13,0,44,33.3,169,152,114,103,0,1,0,1
169,1,53,22.9,137,245,149,67,1,0,1,0
251,0,25,21.7,177,223,122,85,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...
30,1,63,20.5,132,163,182,67,0,1,1,2
209,0,30,24.2,163,295,143,62,0,1,1,1
281,1,57,21.7,108,157,102,65,1,1,1,0
147,1,33,23.3,180,260,99,88,1,0,0,1


In [23]:
y_train

44     0
85     0
13     0
169    1
251    1
      ..
30     0
209    0
281    1
147    0
7      0
Name: risk, Length: 240, dtype: int64

In [27]:
y_test

54     1
195    0
181    1
2      0
69     1
227    0
48     0
105    1
98     0
148    0
193    1
76     0
166    0
47     1
151    0
11     0
38     1
15     1
109    0
247    1
94     0
259    1
23     1
292    1
215    1
104    0
72     1
164    0
257    0
128    1
140    1
159    1
22     0
0      0
146    1
244    1
51     1
179    0
21     1
125    1
86     1
37     0
294    0
103    1
280    1
81     0
175    0
202    1
107    1
230    0
287    0
268    1
177    0
112    0
248    0
3      1
182    0
216    1
139    1
168    1
Name: risk, dtype: int64

In [26]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.50      0.75      0.60        28
           1       0.61      0.34      0.44        32

    accuracy                           0.53        60
   macro avg       0.56      0.55      0.52        60
weighted avg       0.56      0.53      0.51        60

