# Hypertension 


In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

import os

os.getcwd()


'c:\\Users\\user\\MACHINE_LEARNING_MODEL\\Hypertension'

In [2]:
data = pd.read_csv("hypertension_dataset.csv")

data.head ()


Unnamed: 0,Age,Salt_Intake,Stress_Score,BP_History,Sleep_Duration,BMI,Medication,Family_History,Exercise_Level,Smoking_Status,Has_Hypertension
0,69,8.0,9,Normal,6.4,25.8,,Yes,Low,Non-Smoker,Yes
1,32,11.7,10,Normal,5.4,23.4,,No,Low,Non-Smoker,No
2,78,9.5,3,Normal,7.1,18.7,,No,Moderate,Non-Smoker,No
3,38,10.0,10,Hypertension,4.2,22.1,ACE Inhibitor,No,Low,Non-Smoker,Yes
4,41,9.8,1,Prehypertension,5.8,16.2,Other,No,Moderate,Non-Smoker,No


In [3]:
# Shape of the dataset
print("Dataset shape:", data.shape)

# Column types and missing values
print(data.info())

print(data.describe())

data.columns


Dataset shape: (1985, 11)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1985 entries, 0 to 1984
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Age               1985 non-null   int64  
 1   Salt_Intake       1985 non-null   float64
 2   Stress_Score      1985 non-null   int64  
 3   BP_History        1985 non-null   object 
 4   Sleep_Duration    1985 non-null   float64
 5   BMI               1985 non-null   float64
 6   Medication        1186 non-null   object 
 7   Family_History    1985 non-null   object 
 8   Exercise_Level    1985 non-null   object 
 9   Smoking_Status    1985 non-null   object 
 10  Has_Hypertension  1985 non-null   object 
dtypes: float64(3), int64(2), object(6)
memory usage: 170.7+ KB
None
               Age  Salt_Intake  Stress_Score  Sleep_Duration          BMI
count  1985.000000  1985.000000   1985.000000     1985.000000  1985.000000
mean     50.341058     8.531688      

Index(['Age', 'Salt_Intake', 'Stress_Score', 'BP_History', 'Sleep_Duration',
       'BMI', 'Medication', 'Family_History', 'Exercise_Level',
       'Smoking_Status', 'Has_Hypertension'],
      dtype='object')

Defaulting to user installation because normal site-packages is not writeable
Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting notebook (from jupyter)
  Downloading notebook-7.5.1-py3-none-any.whl.metadata (10 kB)
Collecting jupyter-console (from jupyter)
  Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)
Collecting nbconvert (from jupyter)
  Downloading nbconvert-7.16.6-py3-none-any.whl.metadata (8.5 kB)
Collecting ipywidgets (from jupyter)
  Downloading ipywidgets-8.1.8-py3-none-any.whl.metadata (2.4 kB)
Collecting jupyterlab (from jupyter)
  Downloading jupyterlab-4.5.1-py3-none-any.whl.metadata (16 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets->jupyter)
  Downloading widgetsnbextension-4.0.15-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets->jupyter)
  Downloading jupyterlab_widgets-3.0.16-py3-none-any.whl.metadata (20 kB)
Collecting async-lru>=1.0.0 (from jupyt

# AGE 

In [7]:
import pandas as pd
import plotly.express as px

# Calculate average age by hypertension status
age_avg = data.groupby("Has_Hypertension")["Age"].mean().reset_index()

# Create bar chart
fig = px.bar(
    age_avg,
    x="Has_Hypertension",
    y="Age",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Age",
    labels={"Has_Hypertension": "Has Hypertension", "Age": "Average Age"},
    title="Average Age by Hypertension Status"
)

# Show values on bars
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')

# Update layout for Word-friendly size, white background, and proper margins
fig.update_layout(
    width=600,               # narrow width
    height=400,              # shorter height
    paper_bgcolor='white',   # background outside plot
    plot_bgcolor='white',    # background inside plot
    yaxis=dict(title="Average Age", range=[0, age_avg['Age'].max() + 10]),  # add space above bars
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),       # center title
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)  # increase margins to prevent cut-off
)

# Display in browser
fig.show()


In [12]:

age_summary = (
    data
    .groupby("Has_Hypertension")["Age"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
age_summary["IQR"] = age_summary["Q3"] - age_summary["Q1"]

# Round for presentation
age_summary = age_summary.round(2)

age_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,46.08,43.0,18.95,18,30.0,61.0,84,31.0
Yes,54.28,56.0,19.06,18,38.0,70.0,84,32.0


In [14]:

# Calculate average salt intake by hypertension status
salt_avg = data.groupby("Has_Hypertension")["Salt_Intake"].mean().reset_index()

# Create bar chart
fig = px.bar(
    salt_avg,
    x="Has_Hypertension",
    y="Salt_Intake",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Salt_Intake",
    labels={
        "Has_Hypertension": "Hypertension Status",
        "Salt_Intake": "Average Salt Intake (g/day)"
    },
    title="Average Salt Intake by Hypertension Status"
)

# Show values on bars
fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside'
)

# Layout optimized for Word documents
fig.update_layout(
    width=600,
    height=400,
    paper_bgcolor='white',
    plot_bgcolor='white',
    yaxis=dict(
        title="Average Salt Intake (g/day)",
        range=[0, salt_avg["Salt_Intake"].max() + 2]  # headroom for labels
    ),
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()


In [15]:
# Summary statistics for Salt Intake by hypertension status
salt_summary = (
    data
    .groupby("Has_Hypertension")["Salt_Intake"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
salt_summary["IQR"] = salt_summary["Q3"] - salt_summary["Q1"]

# Round for presentation
salt_summary = salt_summary.round(2)

salt_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,8.29,8.3,1.88,2.5,7.1,9.5,14.8,2.4
Yes,8.75,8.7,2.07,2.5,7.3,10.3,16.4,3.0
