# Hypertension 


In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

import os

os.getcwd()


'c:\\Users\\user\\MACHINE_LEARNING_MODEL\\Hypertension'

In [3]:
data = pd.read_csv("hypertension_dataset.csv")

data.head ()


Unnamed: 0,Age,Salt_Intake,Stress_Score,BP_History,Sleep_Duration,BMI,Medication,Family_History,Exercise_Level,Smoking_Status,Has_Hypertension
0,69,8.0,9,Normal,6.4,25.8,,Yes,Low,Non-Smoker,Yes
1,32,11.7,10,Normal,5.4,23.4,,No,Low,Non-Smoker,No
2,78,9.5,3,Normal,7.1,18.7,,No,Moderate,Non-Smoker,No
3,38,10.0,10,Hypertension,4.2,22.1,ACE Inhibitor,No,Low,Non-Smoker,Yes
4,41,9.8,1,Prehypertension,5.8,16.2,Other,No,Moderate,Non-Smoker,No


In [4]:
# Shape of the dataset
print("Dataset shape:", data.shape)

# Column types and missing values
print(data.info())

print(data.describe())

data.columns


Dataset shape: (1985, 11)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1985 entries, 0 to 1984
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Age               1985 non-null   int64  
 1   Salt_Intake       1985 non-null   float64
 2   Stress_Score      1985 non-null   int64  
 3   BP_History        1985 non-null   object 
 4   Sleep_Duration    1985 non-null   float64
 5   BMI               1985 non-null   float64
 6   Medication        1186 non-null   object 
 7   Family_History    1985 non-null   object 
 8   Exercise_Level    1985 non-null   object 
 9   Smoking_Status    1985 non-null   object 
 10  Has_Hypertension  1985 non-null   object 
dtypes: float64(3), int64(2), object(6)
memory usage: 170.7+ KB
None
               Age  Salt_Intake  Stress_Score  Sleep_Duration          BMI
count  1985.000000  1985.000000   1985.000000     1985.000000  1985.000000
mean     50.341058     8.531688      

Index(['Age', 'Salt_Intake', 'Stress_Score', 'BP_History', 'Sleep_Duration',
       'BMI', 'Medication', 'Family_History', 'Exercise_Level',
       'Smoking_Status', 'Has_Hypertension'],
      dtype='object')

# AGE 

In [5]:
import pandas as pd
import plotly.express as px

# Calculate average age by hypertension status
age_avg = data.groupby("Has_Hypertension")["Age"].mean().reset_index()

# Create bar chart
fig = px.bar(
    age_avg,
    x="Has_Hypertension",
    y="Age",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Age",
    labels={"Has_Hypertension": "Has Hypertension", "Age": "Average Age"},
    title="Average Age by Hypertension Status"
)

# Show values on bars
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')

# Update layout for Word-friendly size, white background, and proper margins
fig.update_layout(
    width=600,               # narrow width
    height=400,              # shorter height
    paper_bgcolor='white',   # background outside plot
    plot_bgcolor='white',    # background inside plot
    yaxis=dict(title="Average Age", range=[0, age_avg['Age'].max() + 10]),  # add space above bars
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),       # center title
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)  # increase margins to prevent cut-off
)

# Display in browser
fig.show()


In [6]:

age_summary = (
    data
    .groupby("Has_Hypertension")["Age"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
age_summary["IQR"] = age_summary["Q3"] - age_summary["Q1"]

# Round for presentation
age_summary = age_summary.round(2)

age_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,46.08,43.0,18.95,18,30.0,61.0,84,31.0
Yes,54.28,56.0,19.06,18,38.0,70.0,84,32.0


# SALT INTAKE

In [7]:

# Calculate average salt intake by hypertension status
salt_avg = data.groupby("Has_Hypertension")["Salt_Intake"].mean().reset_index()

# Create bar chart
fig = px.bar(
    salt_avg,
    x="Has_Hypertension",
    y="Salt_Intake",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Salt_Intake",
    labels={
        "Has_Hypertension": "Hypertension Status",
        "Salt_Intake": "Average Salt Intake (g/day)"
    },
    title="Average Salt Intake by Hypertension Status"
)

# Show values on bars
fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside'
)

# Layout optimized for Word documents
fig.update_layout(
    width=600,
    height=400,
    paper_bgcolor='white',
    plot_bgcolor='white',
    yaxis=dict(
        title="Average Salt Intake (g/day)",
        range=[0, salt_avg["Salt_Intake"].max() + 2]  # headroom for labels
    ),
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()


In [8]:
# Summary statistics for Salt Intake by hypertension status
salt_summary = (
    data
    .groupby("Has_Hypertension")["Salt_Intake"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
salt_summary["IQR"] = salt_summary["Q3"] - salt_summary["Q1"]

# Round for presentation
salt_summary = salt_summary.round(2)

salt_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,8.29,8.3,1.88,2.5,7.1,9.5,14.8,2.4
Yes,8.75,8.7,2.07,2.5,7.3,10.3,16.4,3.0


# STRESS SCORE

In [9]:
# Calculate average stress score by hypertension status
stress_avg = data.groupby("Has_Hypertension")["Stress_Score"].mean().reset_index()

# Create bar chart
fig = px.bar(
    stress_avg,
    x="Has_Hypertension",
    y="Stress_Score",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Stress_Score",
    labels={
        "Has_Hypertension": "Hypertension Status",
        "Stress_Score": "Average Stress Score"
    },
    title="Average Stress Score by Hypertension Status"
)

# Show values on bars
fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside'
)

# Layout optimized for Word documents
fig.update_layout(
    width=600,
    height=400,
    paper_bgcolor='white',
    plot_bgcolor='white',
    yaxis=dict(
        title="Average Stress Score",
        range=[0, stress_avg["Stress_Score"].max() + 2]  # headroom for labels
    ),
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()


In [10]:
# Summary statistics for Stress Score by hypertension status
stress_summary = (
    data
    .groupby("Has_Hypertension")["Stress_Score"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
stress_summary["IQR"] = stress_summary["Q3"] - stress_summary["Q1"]

# Round for presentation
stress_summary = stress_summary.round(2)

stress_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,4.37,4.0,2.87,0,2.0,6.0,10,4.0
Yes,5.54,6.0,3.28,0,3.0,9.0,10,6.0


# SLEEP DURATION

In [11]:
import plotly.express as px

# Calculate average sleep duration by hypertension status
sleep_avg = data.groupby("Has_Hypertension")["Sleep_Duration"].mean().reset_index()

# Create bar chart
fig = px.bar(
    sleep_avg,
    x="Has_Hypertension",
    y="Sleep_Duration",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="Sleep_Duration",
    labels={
        "Has_Hypertension": "Hypertension Status",
        "Sleep_Duration": "Average Sleep Duration (hours)"
    },
    title="Average Sleep Duration by Hypertension Status"
)

# Show values on bars
fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside'
)

# Layout optimized for Word documents
fig.update_layout(
    width=600,
    height=400,
    paper_bgcolor='white',
    plot_bgcolor='white',
    yaxis=dict(
        title="Average Sleep Duration (hours)",
        range=[0, sleep_avg["Sleep_Duration"].max() + 2]  # headroom for labels
    ),
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()


In [12]:
# Summary statistics for Sleep Duration by hypertension status
sleep_summary = (
    data
    .groupby("Has_Hypertension")["Sleep_Duration"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
sleep_summary["IQR"] = sleep_summary["Q3"] - sleep_summary["Q1"]

# Round for presentation
sleep_summary = sleep_summary.round(2)

sleep_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,6.64,6.6,1.38,2.2,5.7,7.6,11.0,1.9
Yes,6.28,6.3,1.66,1.5,5.0,7.4,11.4,2.4


# BMI

In [13]:
import plotly.express as px

# Calculate average BMI by hypertension status
bmi_avg = data.groupby("Has_Hypertension")["BMI"].mean().reset_index()

# Create bar chart
fig = px.bar(
    bmi_avg,
    x="Has_Hypertension",
    y="BMI",
    color="Has_Hypertension",
    color_discrete_map={"Yes": "red", "No": "green"},
    text="BMI",
    labels={
        "Has_Hypertension": "Hypertension Status",
        "BMI": "Average Body Mass Index (BMI)"
    },
    title="Average BMI by Hypertension Status"
)

# Show values on bars
fig.update_traces(
    texttemplate='%{text:.2f}',
    textposition='outside'
)

# Layout optimized for Word documents
fig.update_layout(
    width=600,
    height=400,
    paper_bgcolor='white',
    plot_bgcolor='white',
    yaxis=dict(
        title="Average Body Mass Index (BMI)",
        range=[0, bmi_avg["BMI"].max() + 5]  # extra headroom for labels
    ),
    xaxis=dict(title="Hypertension Status"),
    title=dict(x=0.5),
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()


In [14]:
# Summary statistics for BMI by hypertension status
bmi_summary = (
    data
    .groupby("Has_Hypertension")["BMI"]
    .agg(
        Mean="mean",
        Median="median",
        Std_Dev="std",
        Min="min",
        Q1=lambda x: x.quantile(0.25),
        Q3=lambda x: x.quantile(0.75),
        Max="max"
    )
)

# Add Interquartile Range (IQR)
bmi_summary["IQR"] = bmi_summary["Q3"] - bmi_summary["Q1"]

# Round for presentation
bmi_summary = bmi_summary.round(2)

bmi_summary


Unnamed: 0_level_0,Mean,Median,Std_Dev,Min,Q1,Q3,Max,IQR
Has_Hypertension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,25.33,25.4,4.18,11.9,22.7,28.2,38.4,5.5
Yes,26.64,26.6,4.71,13.4,23.4,30.2,41.9,6.8
