In [None]:
import datetime

import altair as alt
import numpy as np
import pandas as pd

from sklearn import linear_model

### Load blood pressure readings from a spreadsheet exported from the Balance Health app.
Each set of 3 readings is considered to be part of the same measurement.

In [None]:
def load_readings(path, tz="America/Los_Angeles"):
    df = pd.read_csv(path, parse_dates=["Date/Time"])
    df["Timestamp"] = df["Date/Time"].dt.tz_localize("UTC").dt.tz_convert(tz)
    df["Measurement"] = (df.index / 3).astype(int)
    df["Index"] = df.groupby("Measurement").cumcount()
    df["Pulse Pressure"] = df["Systolic"] - df["Diastolic"]
    df.drop(columns=["Date/Time"], axis=1, inplace=True)
    return df

readings = load_readings("history.csv")
readings.tail(10)

### How much do readings change within a single measurement?

In [None]:
fields = [ "Systolic", "Diastolic", "Pulse Pressure", "Pulse" ]

In [None]:
for field in fields:
    std = np.sqrt(readings.groupby("Measurement")[field].var().mean())
    print("{:10}: ±{:.2f}".format(field, std))

In [None]:
def measurement_deltas(readings):
    deltas = readings.pivot(index="Measurement", columns="Index", values=fields)
    for field in set(deltas.columns.get_level_values(0)):
        deltas[field] -= pd.concat([ deltas[field, 0], deltas[field, 0], deltas[field, 0] ], axis=1, ignore_index=True)
    deltas = pd.melt(deltas)
    deltas.columns = [ "field", "index", "value" ]
    return deltas

deltas = measurement_deltas(readings)
deltas.groupby(["field", "index"]).describe()

In [None]:
alt.Chart(deltas).mark_bar(opacity=0.8).encode(
    x=alt.X("index:O", axis=alt.Axis(title="Reading #")), 
    y=alt.Y("mean(value):Q", axis=alt.Axis(title="mean difference from first reading")),
    column=alt.Column("field", title=None)
).transform_filter(
    alt.datum.field != "Pulse Pressure"
).properties(
    height=250,
    width=100
)

### Do any of the measured values correlate?

In [None]:
alt.Chart(readings).mark_circle().encode(
    alt.X(alt.repeat("column"), type = "quantitative", scale = alt.Scale(zero = False)),
    alt.Y(alt.repeat("row"), type = "quantitative", scale = alt.Scale(zero = False))
).properties(
    width = 150,
    height = 150
).repeat(
    row = [ "Systolic", "Diastolic", "Pulse" ],
    column = [ "Systolic", "Diastolic", "Pulse" ]
)

In [None]:
readings.corr()

### Show the median values for each measurement on a timeline.

In [None]:
def plot_timeline(measurements):
    timeline = alt.Chart(measurements).encode(
        alt.X("Timestamp:T", axis=alt.Axis(title=None))
    ).properties(
        width = 1000
    )

    systolic = timeline.mark_circle(opacity=0.6).encode(
        alt.Y("Systolic:Q", scale = alt.Scale(zero=False)),
        tooltip=[alt.Tooltip("Timestamp:T", format="%x %X"), "Systolic"]
    )

    diastolic = timeline.mark_circle(opacity=0.6).encode(
        alt.Y("Diastolic:Q", scale = alt.Scale(zero=False)),
        tooltip=[alt.Tooltip("Timestamp:T", format="%x %X"), "Diastolic"]
    )

    pressure_range = timeline.mark_rule(opacity=0.6, color="steelblue").encode(
        alt.Y("Systolic:Q", scale = alt.Scale(zero=False), axis=alt.Axis(title="mmHg")),
        alt.Y2("Diastolic:Q")
    )

    pressure_median = alt.Chart(measurements).mark_rect(opacity=0.1, color="gray").encode(
        alt.Y("median(Systolic):Q"),
        alt.Y2("median(Diastolic):Q")
    )

    pressure = alt.layer(
        pressure_range,
        systolic,
        diastolic
    ).properties(
        height=200
    ).interactive(
        bind_y = False
    )

    pulse = timeline.mark_tick().encode(
        alt.Y("Pulse:Q", scale=alt.Scale(domain=[50, 100]), axis=alt.Axis(title="bpm", grid=False)),
        tooltip=[alt.Tooltip("Timestamp:T", format="%x %X"), "Pulse"]
    ).properties(
        height=50
    )

    return alt.vconcat(pressure_median + pressure, pulse).resolve_scale(
        x="shared"
    )

In [None]:
def groupby_measurement(readings, field=fields):
    df = readings.groupby("Measurement")[fields].median()
    df["Timestamp"] = readings.groupby("Measurement")["Timestamp"].first()
    df["Note"] = readings.groupby("Measurement")["Note"].first()
    return df

measurements = groupby_measurement(readings)
measurements.tail(10)

In [None]:
plot_timeline(measurements)

### Show the daily measurement on a timeline.

The "daily measurement" is the first measurement taken each day, excluding measurements with notes.

In [None]:
daily_measurements = measurements[measurements["Note"].isna()].groupby(measurements["Timestamp"].dt.date).first()
daily_measurements.tail()

In [None]:
plot_timeline(daily_measurements)

### Are there any trends in the daily measurements?

In [None]:
def trend_per_day(measurements, field):
    X = (measurements["Timestamp"] - measurements["Timestamp"][0]).dt.days.values.reshape(-1, 1)
    r = linear_model.LinearRegression().fit(X, measurements[field])
    print(field, r.intercept_, r.coef_)

for field in fields:
    trend_per_day(daily_measurements, field)

### Does the time a daily measurement was taken matter?

In [None]:
def trend_per_hour(measurements, field):
    time_of_day = measurements["Timestamp"].dt.hour * 60 + measurements["Timestamp"].dt.minute
    X = time_of_day.values.reshape(-1, 1)
    r = linear_model.LinearRegression().fit(X, measurements[field])
    print(field, r.intercept_, r.coef_)
    return pd.DataFrame({"Timestamp": measurements["Timestamp"], field: r.predict(X)})

def plot_trend_per_hour(measurements, field):
    chart = alt.Chart(daily_measurements).mark_circle().encode(
        alt.X("hoursminutes(Timestamp):T", title="Time of Day"),
        alt.Y(field, type = "quantitative", scale = alt.Scale(zero = False)),
        tooltip=[alt.Tooltip("Timestamp:T", format="%x %X")]
    ).properties(
        width = 600,
        height = 150
    ).interactive(
        bind_y = False
    )
    chart_fit = alt.Chart(trend_per_hour(measurements, field)).mark_line(strokeDash=[5, 5], opacity=0.5).encode(x="hoursminutes(Timestamp):T", y=field)
    return chart + chart_fit

alt.vconcat(*[plot_trend_per_hour(daily_measurements, field) for field in fields])

### Does the amount of exercise during the previous 7 days affect blood pressure?

Load the total number of calories burned each day (exported as a spreadsheet from Fitbit via Zenobase).

In [None]:
def load_calories(path="steps.csv", tz="America/Los_Angeles"):
    data = pd.read_csv(path, parse_dates=["timestamp"])
    df = pd.DataFrame(data={
        "Timestamp": data["timestamp"].dt.tz_localize("UTC").dt.tz_convert(tz).dt.date,
        "Calories": data["energy.@value"]
    })
    return df.set_index("Timestamp")

calories = load_calories()
calories.head(10)

In [None]:
calories.rolling(7).sum().shift(1).join(daily_measurements).corr()