In [11]:
import pandas as pd
data = pd.read_csv("MHR_Dataset.csv")

In [12]:
data.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [17]:
data.shape

(1014, 7)

In [14]:
#data.drop_duplicates(inplace=True)

In [18]:
#Deleting null values
data.dropna(inplace=True)

In [20]:
#Class_Distribution
value_counts = data['RiskLevel'].value_counts()
print(value_counts)

RiskLevel
low risk     406
mid risk     336
high risk    272
Name: count, dtype: int64


In [24]:
#Data Cleaning and Normalisation
#Using Z-Score method Decteing The Outliers
from scipy.stats import zscore
import numpy as np
outliers = {}
for column in data.columns[:-1]:  
    z_scores = zscore(data[column])
    outlier= data[np.abs(z_scores) > 3][column]  
    outliers[column] = outlier

In [28]:
outliers['HeartRate']

499    7
908    7
Name: HeartRate, dtype: int64

In [31]:
#Removing the outliers
for column, outlier_values in outliers.items():
    data = data[~data[column].isin(outlier_values)]

In [32]:
data.shape

(977, 7)

In [43]:
#Normalisation
#Normalising the whole data using zscore normalisation
norm_df=data.copy()
for column in norm_df.columns[:-1]:  # Excluding target variable 'Class'
    norm_df[column] = (norm_df[column] - norm_df[column].mean()) / norm_df[column].std()
    #using the z score formula, you can use standard scaler from sklearn as well

In [44]:
norm_df.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,-0.346904,0.979076,0.282276,2.185686,-0.479931,1.525022,high risk
1,0.404427,1.543516,1.011093,1.511367,-0.479931,-0.582585,high risk
2,-0.046372,-1.278686,-0.44654,-0.174429,1.058936,0.734669,high risk
3,0.028761,1.543516,0.646685,-0.511588,-0.479931,-0.582585,high risk
4,0.404427,0.414635,-1.175356,-0.815032,-0.479931,0.207768,low risk


In [38]:
norm_df['RiskLevel'].value_counts()

RiskLevel
low risk     399
mid risk     334
high risk    244
Name: count, dtype: int64

In [45]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
norm_df["RiskLevel"] = encoder.fit_transform(norm_df["RiskLevel"])

In [46]:
norm_df["RiskLevel"].value_counts()

RiskLevel
1    399
2    334
0    244
Name: count, dtype: int64

In [47]:
norm_df.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,-0.346904,0.979076,0.282276,2.185686,-0.479931,1.525022,0
1,0.404427,1.543516,1.011093,1.511367,-0.479931,-0.582585,0
2,-0.046372,-1.278686,-0.44654,-0.174429,1.058936,0.734669,0
3,0.028761,1.543516,0.646685,-0.511588,-0.479931,-0.582585,0
4,0.404427,0.414635,-1.175356,-0.815032,-0.479931,0.207768,1


In [48]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(norm_df, test_size = 0.2, random_state = 42)

In [49]:
train_data.shape

(781, 7)

In [50]:
test_data.shape

(196, 7)

In [51]:
X_train = train_data.drop(columns=['RiskLevel'],axis=1)
X_test = test_data.drop(columns=['RiskLevel'],axis=1)
Y_train = train_data['RiskLevel']
Y_test = test_data['RiskLevel']

In [57]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,Y_train)

In [60]:
Y_pred = model.predict(X_test)

In [67]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
accuracy = accuracy_score(Y_test, Y_pred)
report = classification_report(Y_test, Y_pred)
confusion = confusion_matrix(Y_test, Y_pred)

In [68]:
accuracy

0.5969387755102041

In [69]:
print(report)

              precision    recall  f1-score   support

           0       0.68      0.64      0.66        47
           1       0.64      0.84      0.72        85
           2       0.39      0.25      0.30        64

    accuracy                           0.60       196
   macro avg       0.57      0.57      0.56       196
weighted avg       0.57      0.60      0.57       196



In [70]:
print(confusion)

[[30  5 12]
 [ 1 71 13]
 [13 35 16]]
