# **Data Analytics on Elderly Blood Pressure Records**

In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.graph_objs as go

## **Data Preparation**

In [None]:
# Upload the CSV file
from google.colab import files
uploaded = files.upload()

Saving FYP2024_Elderly Blood Pressure Data Analytics.csv to FYP2024_Elderly Blood Pressure Data Analytics.csv


In [None]:
# Read the csv file
bp_data = pd.read_csv("FYP2024_Elderly Blood Pressure Data Analytics.csv")
bp_data

Unnamed: 0,Caregiver Name,Record ID,Elderly Name,Recorded Date,Recorded Time,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status
0,Alice,-O1F12pHsvCwQ7LHVyvS,Bob Tan,2024-07-01,10:50 am,116,76,72,Normal
1,Alice,-O1F1ArHA-j-tpmKoNh9,Bob Tan,2024-07-02,11:00 am,122,82,75,High
2,Alice,-O1F2WNInpTAwNz6GxMI,Bob Tan,2024-07-03,11:00 am,115,74,70,Normal
3,Alice,-O1F2bQHMarxV__Mdtju,Bob Tan,2024-07-04,11:05 am,117,75,73,Normal
4,Alice,-O1F2hkgEQ9Snt8gBmIm,Bob Tan,2024-07-05,12:15 pm,130,85,71,High
...,...,...,...,...,...,...,...,...,...
57,Mark,-O0qamYNhFT8PSvvPZqf,Sarah Toh,2024-07-27,12:00 pm,130,85,75,High
58,Mark,-O0qaxx8ATsnZ_vkictq,Sarah Toh,2024-07-28,11:40 am,107,78,70,Normal
59,Mark,-O0qb2SXgFRCSQmUDuqs,Sarah Toh,2024-07-29,10:15 am,102,60,74,Normal
60,Mark,-O0qb6ilL4fXjGYsUFzr,Sarah Toh,2024-07-30,11:05 am,104,70,82,Normal


In [None]:
# Check for any duplicate records
bp_data.duplicated().sum()

0

In [None]:
# Check for any missing records
bp_data.isnull().sum()

Caregiver Name              0
Record ID                   0
Elderly Name                0
Recorded Date               0
Recorded Time               0
Systolic Blood Pressure     0
Diastolic Blood Pressure    0
Heart Rate                  0
Blood Pressure Status       0
dtype: int64

In [None]:
# Drop the column that is not useful for data analytics
bp_data = bp_data.drop(["Caregiver Name", "Record ID"], axis=1)
bp_data

Unnamed: 0,Elderly Name,Recorded Date,Recorded Time,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status
0,Bob Tan,2024-07-01,10:50 am,116,76,72,Normal
1,Bob Tan,2024-07-02,11:00 am,122,82,75,High
2,Bob Tan,2024-07-03,11:00 am,115,74,70,Normal
3,Bob Tan,2024-07-04,11:05 am,117,75,73,Normal
4,Bob Tan,2024-07-05,12:15 pm,130,85,71,High
...,...,...,...,...,...,...,...
57,Sarah Toh,2024-07-27,12:00 pm,130,85,75,High
58,Sarah Toh,2024-07-28,11:40 am,107,78,70,Normal
59,Sarah Toh,2024-07-29,10:15 am,102,60,74,Normal
60,Sarah Toh,2024-07-30,11:05 am,104,70,82,Normal


In [None]:
# Create temporary datetime column for the conversion of recorded date and time columns to datetime format
bp_data["Temporary DateTime"] = pd.to_datetime(bp_data["Recorded Date"] + " " + bp_data["Recorded Time"], format="%Y-%m-%d %I:%M %p")

# Add new recorded datetime column at 2nd column
bp_data.insert(1, column="Recorded DateTime", value=bp_data["Temporary DateTime"])
bp_data

Unnamed: 0,Elderly Name,Recorded DateTime,Recorded Date,Recorded Time,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status,Temporary DateTime
0,Bob Tan,2024-07-01 10:50:00,2024-07-01,10:50 am,116,76,72,Normal,2024-07-01 10:50:00
1,Bob Tan,2024-07-02 11:00:00,2024-07-02,11:00 am,122,82,75,High,2024-07-02 11:00:00
2,Bob Tan,2024-07-03 11:00:00,2024-07-03,11:00 am,115,74,70,Normal,2024-07-03 11:00:00
3,Bob Tan,2024-07-04 11:05:00,2024-07-04,11:05 am,117,75,73,Normal,2024-07-04 11:05:00
4,Bob Tan,2024-07-05 12:15:00,2024-07-05,12:15 pm,130,85,71,High,2024-07-05 12:15:00
...,...,...,...,...,...,...,...,...,...
57,Sarah Toh,2024-07-27 12:00:00,2024-07-27,12:00 pm,130,85,75,High,2024-07-27 12:00:00
58,Sarah Toh,2024-07-28 11:40:00,2024-07-28,11:40 am,107,78,70,Normal,2024-07-28 11:40:00
59,Sarah Toh,2024-07-29 10:15:00,2024-07-29,10:15 am,102,60,74,Normal,2024-07-29 10:15:00
60,Sarah Toh,2024-07-30 11:05:00,2024-07-30,11:05 am,104,70,82,Normal,2024-07-30 11:05:00


In [None]:
# Drop the recorded date, recorded time, and temporary datetime column
updated_bp_data = bp_data.drop(["Recorded Date", "Recorded Time", "Temporary DateTime"], axis=1)

# Display the updated dataframe
updated_bp_data

Unnamed: 0,Elderly Name,Recorded DateTime,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status
0,Bob Tan,2024-07-01 10:50:00,116,76,72,Normal
1,Bob Tan,2024-07-02 11:00:00,122,82,75,High
2,Bob Tan,2024-07-03 11:00:00,115,74,70,Normal
3,Bob Tan,2024-07-04 11:05:00,117,75,73,Normal
4,Bob Tan,2024-07-05 12:15:00,130,85,71,High
...,...,...,...,...,...,...
57,Sarah Toh,2024-07-27 12:00:00,130,85,75,High
58,Sarah Toh,2024-07-28 11:40:00,107,78,70,Normal
59,Sarah Toh,2024-07-29 10:15:00,102,60,74,Normal
60,Sarah Toh,2024-07-30 11:05:00,104,70,82,Normal


In [None]:
# What are the datatypes
updated_bp_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 6 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Elderly Name              62 non-null     object        
 1   Recorded DateTime         62 non-null     datetime64[ns]
 2   Systolic Blood Pressure   62 non-null     int64         
 3   Diastolic Blood Pressure  62 non-null     int64         
 4   Heart Rate                62 non-null     int64         
 5   Blood Pressure Status     62 non-null     object        
dtypes: datetime64[ns](1), int64(3), object(2)
memory usage: 3.0+ KB


### **Dataframe of specific elderly**

**Bob Tan**

In [None]:
# Display Bob dataframe
bob_df = updated_bp_data[updated_bp_data["Elderly Name"] == "Bob Tan"]
bob_df

Unnamed: 0,Elderly Name,Recorded DateTime,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status
0,Bob Tan,2024-07-01 10:50:00,116,76,72,Normal
1,Bob Tan,2024-07-02 11:00:00,122,82,75,High
2,Bob Tan,2024-07-03 11:00:00,115,74,70,Normal
3,Bob Tan,2024-07-04 11:05:00,117,75,73,Normal
4,Bob Tan,2024-07-05 12:15:00,130,85,71,High
5,Bob Tan,2024-07-06 08:00:00,125,82,74,High
6,Bob Tan,2024-07-07 13:31:00,120,80,78,High
7,Bob Tan,2024-07-08 08:00:00,118,78,73,Normal
8,Bob Tan,2024-07-09 08:15:00,118,78,73,Normal
9,Bob Tan,2024-07-10 08:30:00,120,80,75,High


**Sarah Toh**

In [None]:
# Display Sarah dataframe
sarah_df = updated_bp_data[updated_bp_data["Elderly Name"] == "Sarah Toh"]
sarah_df

Unnamed: 0,Elderly Name,Recorded DateTime,Systolic Blood Pressure,Diastolic Blood Pressure,Heart Rate,Blood Pressure Status
31,Sarah Toh,2024-07-01 08:10:00,119,79,72,Normal
32,Sarah Toh,2024-07-02 09:30:00,125,82,75,High
33,Sarah Toh,2024-07-03 08:45:00,88,58,65,Low
34,Sarah Toh,2024-07-04 10:00:00,115,75,70,Normal
35,Sarah Toh,2024-07-05 08:20:00,120,80,74,High
36,Sarah Toh,2024-07-06 09:10:00,110,70,68,Normal
37,Sarah Toh,2024-07-07 11:40:00,115,72,65,Normal
38,Sarah Toh,2024-07-08 08:50:00,95,60,70,Normal
39,Sarah Toh,2024-07-09 09:45:00,125,80,76,High
40,Sarah Toh,2024-07-10 08:00:00,110,75,65,Normal


## **Data Visualization**

###**Bob Tan**

In [None]:
bob_df2 = bob_df[["Recorded DateTime", "Systolic Blood Pressure", "Diastolic Blood Pressure"]]
bob_df2

Unnamed: 0,Recorded DateTime,Systolic Blood Pressure,Diastolic Blood Pressure
0,2024-07-01 10:50:00,116,76
1,2024-07-02 11:00:00,122,82
2,2024-07-03 11:00:00,115,74
3,2024-07-04 11:05:00,117,75
4,2024-07-05 12:15:00,130,85
5,2024-07-06 08:00:00,125,82
6,2024-07-07 13:31:00,120,80
7,2024-07-08 08:00:00,118,78
8,2024-07-09 08:15:00,118,78
9,2024-07-10 08:30:00,120,80


In [None]:
# Define x-axis
bob_datetime = bob_df2["Recorded DateTime"]

# Define y-axis
bob_systolic = bob_df2["Systolic Blood Pressure"]
bob_diastolic = bob_df2["Diastolic Blood Pressure"]

# Define thresholds for blood pressure categories
normal_systolic = 120
normal_diastolic = 80

# Create traces
trace1 = go.Scatter(
    x=bob_datetime,
    y=bob_systolic,
    mode="lines",
    name="Systolic Blood Pressure (mmHg)",
    line=dict(color="blue")
)

trace2 = go.Scatter(
    x=bob_datetime,
    y=bob_diastolic,
    mode="lines",
    name="Diastolic Blood Pressure (mmHg)",
    line=dict(color="red")
)

# Define layout
layout = go.Layout(
    title="Bob Tan's Blood Pressure Report",
    xaxis=dict(
        title="DATES",
        tickformat="%Y-%m-%d",  # Format to display the date
        tickangle=-45,  # Rotate the labels for better visibility
        tickfont=dict(size=10),  # Adjust font size if needed
    ),
    yaxis=dict(title="BLOOD PRESSURE (mmHg)"),
    hovermode="closest",
    showlegend=True,
    width=1000,  # Adjust width
    height=520   # Adjust height
)

# Create figure
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Adding horizontal lines for normal blood pressure levels
fig.add_shape(
    type="line",
    x0=0, x1=1, y0=normal_systolic, y1=normal_systolic,
    xref="paper", yref="y",
    line=dict(color="crimson", dash="dot"),
)

fig.add_shape(
    type="line",
    x0=0, x1=1, y0=normal_diastolic, y1=normal_diastolic,
    xref="paper", yref="y",
    line=dict(color="darkgreen", dash="dot"),
)

# Annotating multiple spikes
high_systolic_spike_indices = [4, 28]  # Indices of high systolic spikes (highest)
high_diastolic_spike_indices = [4, 28]  # Indices of high diastolic spikes (highest)
low_systolic_spike_indices = [15, 27]  # Indices of low systolic spikes (lowest)
low_diastolic_spike_indices = [15, 27]  # Indices of low diastolic spikes (lowest)

# HIGH SPIKES
# Add markers for high systolic spikes
high_systolic_spike_dates = bob_datetime.iloc[high_systolic_spike_indices]
high_systolic_spike_values = bob_systolic.iloc[high_systolic_spike_indices]

# Plot the annotation for high systolic spike
fig.add_trace(
    go.Scatter(
        x=high_systolic_spike_dates,
        y=high_systolic_spike_values,
        mode="markers+text",
        marker=dict(symbol="circle", size=8, color="orangered"),
        text=high_systolic_spike_dates.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="High Systolic Spike"
    )
)

# Add markers for high diastolic spikes
high_diastolic_spike_dates = bob_datetime.iloc[high_diastolic_spike_indices]
high_diastolic_spike_values = bob_diastolic.iloc[high_diastolic_spike_indices]

# Plot the annotation for high diastolic spike
fig.add_trace(
    go.Scatter(
        x=high_diastolic_spike_dates,
        y=high_diastolic_spike_values,
        mode="markers+text",
        marker=dict(symbol="circle", size=8, color="limegreen"),
        text=high_diastolic_spike_dates.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="High Diastolic Spike"
    )
)

# LOW SPIKES
# Add markers for low systolic spikes
low_systolic_spike_dates = bob_datetime.iloc[low_systolic_spike_indices]
low_systolic_spike_values = bob_systolic.iloc[low_systolic_spike_indices]

# Plot the annotation for low systolic spike
fig.add_trace(
    go.Scatter(
        x=low_systolic_spike_dates,
        y=low_systolic_spike_values,
        mode="markers+text",
        marker=dict(symbol="cross", size=8, color="orangered"),
        text=low_systolic_spike_dates.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="Low Systolic Spike"
    )
)

# Add markers for low diastolic spikes
low_diastolic_spike_dates = bob_datetime.iloc[low_diastolic_spike_indices]
low_diastolic_spike_values = bob_diastolic.iloc[low_diastolic_spike_indices]

# Plot the annotation for low diastolic spike
fig.add_trace(
    go.Scatter(
        x=low_diastolic_spike_dates,
        y=low_diastolic_spike_values,
        mode="markers+text",
        marker=dict(symbol="cross", size=8, color="limegreen"),
        text=low_diastolic_spike_dates.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="Low Diastolic Spike"
    )
)

# Show the plot
fig.show()

###**Sarah Toh**

In [None]:
sarah_df2 = sarah_df[["Recorded DateTime", "Systolic Blood Pressure", "Diastolic Blood Pressure"]]
sarah_df2

Unnamed: 0,Recorded DateTime,Systolic Blood Pressure,Diastolic Blood Pressure
31,2024-07-01 08:10:00,119,79
32,2024-07-02 09:30:00,125,82
33,2024-07-03 08:45:00,88,58
34,2024-07-04 10:00:00,115,75
35,2024-07-05 08:20:00,120,80
36,2024-07-06 09:10:00,110,70
37,2024-07-07 11:40:00,115,72
38,2024-07-08 08:50:00,95,60
39,2024-07-09 09:45:00,125,80
40,2024-07-10 08:00:00,110,75


In [None]:
# Define x-axis
sarah_datetime = sarah_df2["Recorded DateTime"]

# Define y-axis
sarah_systolic = sarah_df2["Systolic Blood Pressure"]
sarah_diastolic = sarah_df2["Diastolic Blood Pressure"]

# Define thresholds for blood pressure categories
normal_systolic = 120
normal_diastolic = 80

# Create traces
trace1 = go.Scatter(
    x=sarah_datetime,
    y=sarah_systolic,
    mode="lines",
    name="Systolic Blood Pressure (mmHg)",
    line=dict(color="blue")
)

trace2 = go.Scatter(
    x=sarah_datetime,
    y=sarah_diastolic,
    mode="lines",
    name="Diastolic Blood Pressure (mmHg)",
    line=dict(color="red")
)

# Define layout
layout = go.Layout(
    title="Sarah Toh's Blood Pressure Report",
    xaxis=dict(
        title="DATES",
        tickformat="%Y-%m-%d",  # Format to display the date
        tickangle=-45,  # Rotate the labels for better visibility
        tickfont=dict(size=10),  # Adjust font size if needed
    ),
    yaxis=dict(title="BLOOD PRESSURE (mmHg)"),
    hovermode="closest",
    showlegend=True,
    width=1000,  # Adjust width
    height=520   # Adjust height
)

# Create figure
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Adding horizontal lines for normal blood pressure levels
fig.add_shape(
    type="line",
    x0=0, x1=1, y0=normal_systolic, y1=normal_systolic,
    xref='paper', yref='y',
    line=dict(color="crimson", dash="dot"),
)

fig.add_shape(
    type="line",
    x0=0, x1=1, y0=normal_diastolic, y1=normal_diastolic,
    xref="paper", yref="y",
    line=dict(color="darkgreen", dash="dot"),
)

# Annotating multiple spikes
high_systolic_spike_indices2 = [23, 26]  # Indices of high systolic spikes (highest)
high_diastolic_spike_indices2 = [23, 26]  # Indices of high diastolic spikes (highest)
low_systolic_spike_indices2 = [17]  # Indices of low systolic spikes (lowest)
low_diastolic_spike_indices2 = [17]  # Indices of low diastolic spikes (lowest)

# HIGH SPIKES
# Add markers for high systolic spikes
high_systolic_spike_dates2 = sarah_datetime.iloc[high_systolic_spike_indices2]
high_systolic_spike_values2 = sarah_systolic.iloc[high_systolic_spike_indices2]

# Plot the annotation for high systolic spike
fig.add_trace(
    go.Scatter(
        x=high_systolic_spike_dates2,
        y=high_systolic_spike_values2,
        mode="markers+text",
        marker=dict(symbol="circle", size=8, color="orangered"),
        text=high_systolic_spike_dates2.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="High Systolic Spike"
    )
)

# Add markers for high diastolic spikes
high_diastolic_spike_dates2 = sarah_datetime.iloc[high_diastolic_spike_indices2]
high_diastolic_spike_values2 = sarah_diastolic.iloc[high_diastolic_spike_indices2]

# Plot the annotation for high diastolic spike
fig.add_trace(
    go.Scatter(
        x=high_diastolic_spike_dates2,
        y=high_diastolic_spike_values2,
        mode="markers+text",
        marker=dict(symbol="circle", size=8, color="limegreen"),
        text=high_diastolic_spike_dates2.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="High Diastolic Spike"
    )
)

# LOW SPIKES
# Add markers for low systolic spikes
low_systolic_spike_dates2 = sarah_datetime.iloc[low_systolic_spike_indices2]
low_systolic_spike_values2 = sarah_systolic.iloc[low_systolic_spike_indices2]

# Plot the annotation for low systolic spike
fig.add_trace(
    go.Scatter(
        x=low_systolic_spike_dates2,
        y=low_systolic_spike_values2,
        mode="markers+text",
        marker=dict(symbol="cross", size=8, color="orangered"),
        text=low_systolic_spike_dates2.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="Low Systolic Spike"
    )
)

# Add markers for low diastolic spikes
low_diastolic_spike_dates2 = sarah_datetime.iloc[low_diastolic_spike_indices2]
low_diastolic_spike_values2 = sarah_diastolic.iloc[low_diastolic_spike_indices2]

# Plot the annotation for low diastolic spike
fig.add_trace(
    go.Scatter(
        x=low_diastolic_spike_dates2,
        y=low_diastolic_spike_values2,
        mode="markers+text",
        marker=dict(symbol="cross", size=8, color="limegreen"),
        text=low_diastolic_spike_dates2.dt.strftime("%d-%b"),  # Adding dates as text
        textposition="top center",
        textfont=dict(size=10),
        name="Low Diastolic Spike"
    )
)

# Show the plot
fig.show()
