In [None]:
import datetime
import glob
import json
import math

import altair as alt
import numpy as np
import pandas as pd

In [None]:
timezone = "America/Los_Angeles"

pd.set_option("max_colwidth", 100)
pd.set_option('display.max_rows', 500)

# must remove data with ambiguous timestamps pending https://github.com/pandas-dev/pandas/pull/22825
def remove_ambiguous_timestamps(df):
    return df[~df.timestamp.between("2018-11-04 01:00-07:00", "2018-11-04 02:00-08:00")]

def merge_on_timestamp(a, b):
    return pd.merge(a, b, how="outer", on=["timestamp"])

### Load blood sugar readings exported from a Tidepool app.

In [None]:
def read_glucose(path):
    df = pd.read_json(path, convert_dates=["time"])
    df = df[df.type == "cbg"]
    df["timestamp"] = df.time.dt.tz_localize("UTC").dt.tz_convert(timezone)
    df["glucose"] = (df["value"] * 18.016).round() # convert mmol/L to mg/dL
    df = df[["timestamp", "glucose"]]
    df["glucose_in_range"] = df.glucose.between(70, 140)    
    glucose_high = df.glucose - 100
    glucose_high[glucose_high < 0] = 0
    df["glucose_high"] = glucose_high
    df = remove_ambiguous_timestamps(df)
    return df

glucose = read_glucose("blip-input.json")
glucose.head()

### Load food notes exported from the Tidepool app.

In [None]:
def read_notes(path):
    df = pd.read_json(path)
    df.timestamp = df.timestamp.dt.tz_localize("UTC").dt.tz_convert(timezone)
    df = df[~pd.isnull(df.timestamp)][["timestamp", "messagetext"]]
    df = remove_ambiguous_timestamps(df)
    return df

notes = read_notes("blip-input.json")
notes.head()

### Load hourly step counts exported from Zenobase.

In [None]:
def read_steps(path):
    df = pd.read_csv(path,
        usecols=["count", "timestamp"],
        parse_dates=["timestamp"],
        index_col=False
    )
    df.columns = ["steps", "timestamp"]
    df.timestamp = df.timestamp.dt.tz_localize("UTC").dt.tz_convert(timezone)
    df["end"] = df.timestamp + pd.to_timedelta(1, unit="h") - pd.to_timedelta(1, unit="s")
    df["activity_level"] = pd.cut(df.steps, bins=[0, 10, 100, 1000, 10000], right=False, labels=False)
    df = remove_ambiguous_timestamps(df)
    return df

steps = read_steps("steps.csv")
steps.tail()

### Plot all data on a timeline.

In [None]:
def plot_timeline(glucose, notes, steps, begin=None, end=None):
    
    data = merge_on_timestamp(glucose, notes)
    data = merge_on_timestamp(data, steps)
    
    if begin:
        data = data[(data.timestamp >= begin) & (data.timestamp < end)]
    
    glucose_chart = alt.Chart().mark_point(opacity=0.8).encode(
        alt.X("hoursminutes(timestamp):T", axis=alt.Axis(grid=False, title=None)),
        alt.Y("glucose:Q", axis=alt.Axis(grid=True, title=None), scale=alt.Scale(domain=[50, 200])),
        color = alt.Color("glucose_in_range:O", scale=alt.Scale(domain=[True, False], range=["steelblue", "red"]), legend=None),
        tooltip = ["hoursminutes(timestamp):T", "glucose:Q"],
    ).transform_filter(
        alt.datum.glucose >= 0
    )

    note_chart = alt.Chart().mark_rule(color="black", opacity=0.8, strokeDash=[1, 1]).encode(
        alt.X("hoursminutes(timestamp):T"),
        tooltip=["hoursminutes(timestamp):T", "messagetext:O"],
        size=alt.value(2)
    ).transform_filter(
        alt.datum.messagetext
    )

    activity_chart = alt.Chart().mark_rect(opacity=0.2).encode(
        x="hoursminutes(timestamp):T",
        x2="hoursminutes(end):T",
        color=alt.Color("activity_level:Q", scale=alt.Scale(scheme="greys"), legend=None),
        tooltip=["hoursminutes(timestamp):T", "steps:Q"]
    ).transform_filter(
        alt.datum.steps >= 0
    )

    return alt.layer(
        activity_chart,
        note_chart,
        glucose_chart,
        data=data
    ).properties(
        width=1200,
        height=100
    ).interactive(
        bind_y = False
    ).resolve_scale(
        x="shared", 
        color="independent"
    ).facet(
        row=alt.Row("yearmonthdate(timestamp):T", title=None)
    )

plot_timeline(glucose, notes, steps, "2018-10-28", "2018-11-11")

### Compare apples to apples.

In [None]:
def find_notes(notes, text):
    return notes[notes.messagetext.str.contains(text)].sort_values(by=["timestamp"])

find_notes(notes, "apple")

In [None]:
def plot_experiment(glucose, experiments):
    data = []
    for experiment in experiments:
        begin, end, label = experiment
        begin = pd.to_datetime(begin).tz_localize(timezone)
        end = pd.to_datetime(end).tz_localize(timezone)
        for i, row in glucose[glucose.timestamp.between(begin, end)].iterrows():
            data.append({
                "label" : label,
                "time" : (row["timestamp"] - begin).total_seconds() // 60,
                "glucose" : row["glucose"],
                "begin" : begin,
                "end" : end
            })
    df = pd.DataFrame(data)

    scale_x = alt.Scale(domain=[0, 180])
    scale_y = alt.Scale(domain=[50, 200])
    highlight = alt.selection(type="multi", on="click", fields=["begin"], nearest=True)
    chart = alt.Chart().encode(
        y=alt.Y("glucose:Q", axis=alt.Axis(grid=True, title="blood glucose (mg/dL)"), scale=scale_y)
    )    
    base = chart.encode(
        x=alt.X("time:Q", axis=alt.Axis(grid=True, title="time (minutes)"), scale=scale_x),
        color=alt.Color("begin:O", legend=None, scale=alt.Scale(range=["steelblue"])),
        tooltip=["time:Q", "glucose:Q", "label:O", "begin:O", "end:O"]
    )
    points = base.mark_point(clip=True).encode(
        opacity=alt.value(0)
    )
    lines = base.mark_line(interpolate="basis", clip=True).encode(
        size=alt.condition(~highlight, alt.value(2), alt.value(3)),
        opacity=alt.condition(~highlight, alt.value(0.4), alt.value(0.8))
    ).add_selection(
        highlight
    )
    band_df = pd.DataFrame([{
        "x_min" : 0,
        "x_max" : 180,
        "y_min" : 70,
        "y_max" : 140
    }])
    band = alt.Chart(band_df).mark_rect(color="lightgray", opacity=0.3).encode(
        x=alt.X("x_min:Q", scale=scale_x),
        x2=alt.X("x_max:Q"),
        y=alt.Y("y_min:Q", scale=scale_y),
        y2=alt.Y("y_max:Q")
    )    
    return alt.layer(band, alt.layer(lines, points, data=df))

plot_experiment(glucose, [
    ("2018-10-31 15:19", "2018-10-31 18:45", "apple juice"),
    ("2018-11-09 16:02", "2018-11-09 18:07", "apple"),
    ("2018-11-21 15:27", "2018-11-21 18:00", "apple juice + walking"),
])

### Correlate step counts with blood sugar levels the next day.

In [None]:
def steps_vs_glucose(steps, glucose):
    data= merge_on_timestamp(steps.groupby(steps.timestamp.dt.date).sum(), glucose.groupby(glucose.timestamp.dt.date).mean().shift()).reset_index()[["steps", "glucose"]]
    c1 = alt.Chart(data).mark_point().encode(
        alt.X("glucose:Q", title="mean glucose (mg/dL)", scale = alt.Scale(zero=False)),
        alt.Y("steps:Q", title="steps previous day")
    ).properties(
        width = 250,
        height = 250
    )
    data= merge_on_timestamp(steps.groupby(steps.timestamp.dt.date).sum(), glucose.groupby(glucose.timestamp.dt.date).mean().shift()).reset_index()[["steps", "glucose_in_range"]]
    c2 = alt.Chart(data).mark_point().encode(
        alt.X("glucose_in_range:Q", title="in range", scale = alt.Scale(zero=False), axis=alt.Axis(format="%")),
        alt.Y("steps:Q", title=None)
    ).properties(
        width = 250,
        height = 250
    )
    data= merge_on_timestamp(steps.groupby(steps.timestamp.dt.date).sum(), glucose.groupby(glucose.timestamp.dt.date).sum().shift()).reset_index()[["steps", "glucose_high"]]
    c3 = alt.Chart(data).mark_point().encode(
        alt.X("glucose_high:Q", title="above 100mg/dL", scale = alt.Scale(zero=True)),
        alt.Y("steps:Q", title=None)
    ).properties(
        width = 250,
        height = 250
    )
    return c1 | c2 | c3

steps_vs_glucose(steps, glucose)