In [1]:
import os
import os.path as path
import glob
import numpy as np
import pandas as pd
from bokeh.io import output_notebook, show
from bokeh.models.annotations import Span
from bokeh.models import HoverTool
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure

In [2]:
output_notebook()

In [3]:
dataroot = '/data/health-tracker'

In [4]:
def verify_schema(lines):
    exp_field_names = ['Meter',
     'Serial Number',
     'Meter Timestamp',
     'Record Type',
     'Historic Glucose(mg/dL)',
     'Scan Glucose(mg/dL)',
     'Non-numeric Rapid-Acting Insulin',
     'Rapid-Acting Insulin (units)',
     'Non-numeric Food',
     'Carbohydrates (grams)',
     'Carbohydrates (servings)',
     'Non-numeric Long-Acting Insulin',
     'Long-Acting Insulin (units)',
     'Notes',
     'Strip Glucose(mg/dL)',
     'Ketone(mmol/L)',
     'Meal Insulin (units)',
     'Correction Insulin (units)',
     'User Change Insulin (units)']
    if not lines[0].startswith("Export"):
        return False, "First line does not start with the word Export"
    if lines[0].find("Avilay") == -1:
        return False, "First line does not have the word Avilay"
    field_names = lines[2].split(",")
    if field_names != exp_field_names:
        print(field_names)
        return False, "Unexpected field names"
    return True, None

def sanitize(lvcsv):
    with open(lvcsv, "rt") as fin:
        content = fin.read()
    content = content.replace("\x00", "")
    lines = content.split("\n")
    is_ok, err = verify_schema(lines)
    if not is_ok:
        print(err)
        return
    newfile = path.join(dataroot, "sanitized", path.basename(lvcsv))
    header = "sno,timestamp,auto_reading,manual_reading,notes"
    with open(newfile, "wt") as fout:
        print(header, file=fout)
        for i, line in enumerate(lines[3:], start=3):
            if line:
                try:
                    flds = line.split(",")
                    sno = flds[1]
                    timestamp = flds[2]
                    auto_reading = flds[4]
                    manual_reading = flds[5]
                    notes = flds[13]
                    print(f"{sno},{timestamp},{auto_reading},{manual_reading},{notes}", file=fout)
                except Exception as ex:
                    print(f"Unable to process line {i}. Skipping.")
                    raise ex
    return newfile


def dedup(df):
    g1 = df.loc[~df.index.duplicated()]
    g2 = df.loc[~df.index.duplicated(keep="last")]
    g3 = g1.combine_first(g2)
    g3["reading"] = g3["auto_reading"].combine_first(g3["manual_reading"])
    g3["time"] = g3.index.strftime("%I:%M %p")
    return g3[["time", "reading", "notes"]]

In [5]:
raw_lvcsvs = glob.glob(path.join(dataroot, "raw", "*.csv"))
mtimes = np.array([os.stat(f).st_mtime for f in raw_lvcsvs])
latest_file = raw_lvcsvs[np.argmax(mtimes)]
latest_file

'/data/health-tracker/raw/LV_AvilayParekh_Export_06-30-2019.csv'

In [6]:
lvcsv = sanitize(latest_file)
glucose = pd.read_csv(lvcsv, parse_dates=["timestamp"], index_col="timestamp")
glucose = glucose.sort_index()
glucose = dedup(glucose)
glucose.shape

(551, 3)

In [7]:
glucose.head()

Unnamed: 0_level_0,time,reading,notes
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-06-25 21:38:00,09:38 PM,131.0,
2019-06-25 21:39:00,09:39 PM,127.0,
2019-06-25 21:53:00,09:53 PM,141.0,
2019-06-25 22:08:00,10:08 PM,153.0,
2019-06-25 22:23:00,10:23 PM,157.0,


In [8]:
notes = glucose["notes"].dropna()

In [12]:
tips = [
    ("time", "@time"),
    ("Note", "@notes"),
    ("Reading", "@reading")
]
hover = HoverTool(tooltips=tips)
p = figure(
    plot_width=950, 
    plot_height=350, 
    title="Glucose Readings", 
    x_axis_type="datetime",
    y_range=(50, 225),
    tools=[hover, "box_zoom", "pan", "reset"]
)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = "Time"
p.yaxis.axis_label = "Value"

source = ColumnDataSource(glucose)
p.line("timestamp", "reading", line_color="dimgray", source=source)

upper = Span(location=140, dimension='width', line_color='firebrick', line_width=0.5)
p.add_layout(upper)

lower = Span(location=80, dimension="width", line_color="darkcyan", line_width=0.5)
p.add_layout(lower)

for ts in notes.index:
    x = Span(location=ts, dimension="height", line_width=0.3, line_color="darkgray")
    p.add_layout(x)

show(p)