<a href="https://colab.research.google.com/github/WhyAvya/vitalis/blob/main/Hypertension.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Hypertension**

## **Load data**

In [17]:
import pandas as pd


url1 = "https://raw.githubusercontent.com/WhyAvya/vitalis/refs/heads/main/Heart%20Database/Hypertension%20Database/Hypertension-risk-model-main.csv"
url2 = "https://raw.githubusercontent.com/WhyAvya/vitalis/refs/heads/main/Heart%20Database/Hypertension%20Database/Hypertension_data.csv"

df1 = pd.read_csv(url1)
df2 = pd.read_csv(url2)


important_cols = ['age', 'BMI', 'currentSmoker', 'heartRate', 'sysBP', 'diaBP']

df1 = df1[[col for col in important_cols if col in df1.columns]]
df2 = df2[[col for col in important_cols if col in df2.columns]]


combined_data = pd.concat([df1, df2])


print(combined_data.head())
print("\nTotal rows:", len(combined_data))

   age    BMI  currentSmoker  heartRate  sysBP  diaBP
0   39  26.97              0       80.0  106.0   70.0
1   46  28.73              0       95.0  121.0   81.0
2   48  25.34              1       75.0  127.5   80.0
3   61  28.58              1       65.0  150.0   95.0
4   46  23.10              1       85.0  130.0   84.0

Total rows: 5247


### Risk attribute


In [18]:

combined_data['Risk'] = ((combined_data['sysBP'] >= 140) | (combined_data['diaBP'] >= 90)).astype(int)

print(combined_data['Risk'].value_counts())

Risk
0    3734
1    1513
Name: count, dtype: int64


### Data spliting


In [19]:
from sklearn.model_selection import train_test_split

X = combined_data[['age', 'BMI', 'currentSmoker', 'heartRate', 'sysBP', 'diaBP']]

y = combined_data['Risk']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



## Model Train


In [20]:
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
import numpy as np


imputer = SimpleImputer(strategy='median')
X_train_filled = imputer.fit_transform(X_train)
X_test_filled = imputer.transform(X_test)


model = LogisticRegression()
model.fit(X_train_filled, y_train)

print("Accuracy:", model.score(X_test_filled, y_test))


Accuracy: 0.9666666666666667


# Evaluate Model


In [21]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test_filled)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       743
           1       0.96      0.93      0.94       307

    accuracy                           0.97      1050
   macro avg       0.96      0.96      0.96      1050
weighted avg       0.97      0.97      0.97      1050


Confusion Matrix:
[[730  13]
 [ 22 285]]


### Prediction


In [25]:
def predict_hypertension_risk(age, bmi, smoker, heart_rate, sys_bp, dia_bp):

    user_data = [[age, bmi, smoker, heart_rate, sys_bp, dia_bp]]
    user_data_filled = imputer.transform(user_data)


    risk_prob = model.predict_proba(user_data_filled)[0][1]
    return f"Hypertension Risk: {risk_prob*100:.1f}%"


print(predict_hypertension_risk(
    age=40,
    bmi=28,
    smoker=1,
    heart_rate=84,
    sys_bp=135,
    dia_bp=88
))

Hypertension Risk: 57.4%




In [27]:
import joblib

joblib.dump(model, 'hypertension_model.pkl')
joblib.dump(imputer, 'imputer.pkl')

print("Model and imputer saved!")

Model and imputer saved!
