In [None]:
class args :
    pass

args.summary = True
args.ndetailed = 4
args.outname = 'output.root'
args.datadir = '../BGSuggest/data'

In [None]:
import re
import os
from string import ascii_letters

def GetInputFiles(_args) :
    # We will process Medtronic csv files OR TidePool json files!
    _args.match_regexp = ['Tidepool_Export.*json','Annotations_.*json']

    inputfilenames = []
    for d in os.listdir(_args.datadir) :
        # _args.match_regexp should be a list of regexp tries
        # (e.g. ['CareLink_Export.*csv','Tidepool_Export.*json']
        matches = list( bool(re.match(matchstr,d)) for matchstr in _args.match_regexp)
        if (True in matches) :
            inputfilenames.append('%s/%s'%(_args.datadir,d))

    inputfilenames = sorted(inputfilenames,key=lambda a: a.lstrip(ascii_letters+'/_.'))
    return inputfilenames

inputfiles = GetInputFiles(args)

Start playing around with just one file
========================

In [None]:
import json
json_file = open(inputfiles[0])
data = json.load(json_file)

In [None]:
data[1]

Getting the Numpy Structured Array to work (one example)
=============

In [None]:
import numpy as np

In [None]:
all_data = np.array([(60,'2019-02-24T22:51:47'),
                     (50,'2019-02-24T22:51:48')],dtype=[('BGReading', np.int16),('DeviceTime','datetime64[s]')])
print(all_data)
print(all_data.dtype)

Now to our case:
============

Set up the data types
-------

In [None]:
## Structured arrays (sequence of named fields)
import datetime

def getBGReading(i) :
    cfactor = 1
    if i.get('units',None) == 'mmol/L' or i.get('units',{}).get('bg',None) == 'mmol/L' :
        cfactor = 18.01559
    return i.get('value')*cfactor if i['type'] == 'smbg' else -1

def getDeviceTime(i) :
    try :
        return np.datetime64(i['deviceTime'])
    except ValueError :
        tmp = datetime.datetime.strptime(i['deviceTime'],'%m/%d/%y %H:%M:%S').strftime('%Y-%m-%dT%H:%M:%S')
        return np.datetime64(tmp)

print('Max of int16 is:',np.iinfo(np.int16).max,'(suitable for BG)')

def getDataFields() :
    from collections import OrderedDict
    fields = [{'name':'DeviceTime','fcn':getDeviceTime,'type':'datetime64[s]'},
              {'name':'BGReading' ,'fcn':getBGReading ,'type':np.int16},
             ]
    return fields

fields = getDataFields()
dtype = list((field['name'],field['type']) for field in fields)
print('Dtype:',dtype)

In [None]:
# Here is something that works:
# np.array(list(tuple(field['fcn'](i) for field in fields) for i in data[:5]),dtype=dtype)

Loop through files and make an np array
------------

In [None]:
# Populate this events list with tuples containing the event info
events = []

for inputfile in inputfiles :

    # print(inputfile)
    with open(inputfile,'r') as json_file :
        data = json.load(json_file)

        for i in data :
            if 'deviceTime' not in i.keys() :
                continue
            #print(i)

            event = tuple()
            for field in fields :
                try :
                    event += (field['fcn'](i),)
                except KeyError :
                    print('Exception occurred with\n',i)

            #print('One tuple:',event)
            events.append(event)

# Sort the events
events.sort(key=lambda x: x[0])

all_data = np.array(events,dtype=dtype)

#print( all_data )
#print( all_data.dtype )
print('length of array:',len(all_data))

# x = np.array(dtype=())

Now start making plots!
==========

Plot of all BG points
----------

In [None]:
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10,6)


# Trick: make a list of booleans, which can then be used to select the indices
# corresponding to True!
BG_indices = all_data['BGReading'] > 0
plt.scatter(all_data['DeviceTime'][BG_indices],all_data['BGReading'][BG_indices])
plt.show()

Rolling average of BG points in time
-----------

In [None]:
print(all_data['BGReading'])

In [None]:
print(all_data['DeviceTime'][0],'to',all_data['DeviceTime'][-1])

In [None]:
import math

timerange = np.arange(all_data['DeviceTime'][0]+np.timedelta64(4,'W'),
                      all_data['DeviceTime'][-1], dtype='datetime64[D]')
bgAverage_17wk = []
bgRMS_17wk = []

for t_plot in timerange :

    bgsOfPreviousWeeks = []
    # print(t_plot)
    for data in all_data :
        if data['BGReading'] < 0 :
            continue
        data_age = t_plot - data['DeviceTime']
        #print ('--',data['DeviceTime'],data_age)
        if data_age < np.timedelta64(0,'s') :
            break
        if data_age > np.timedelta64(17,'W') :
            continue
        bgsOfPreviousWeeks.append(data['BGReading'])

    n = len(bgsOfPreviousWeeks)
    if not n :
        bgAverage_17wk.append(0)
    mean = sum(bgsOfPreviousWeeks)/float(n)
    bgAverage_17wk.append(mean)
    rms = math.sqrt(sum(list(math.pow(a-mean,2) for a in bgsOfPreviousWeeks))/float(n))
    bgRMS_17wk.append(rms)

In [None]:
# Solid "error bars" are achieved using fill_between function
avg = np.array(bgAverage_17wk)
rms = np.array(bgRMS_17wk)

fig, ax = plt.subplots(constrained_layout=True)
h1 = ax.plot(timerange,bgAverage_17wk,label='17-week average')
ax.set(xlabel='time', ylabel='BG (mg/dL)',title='Seventeen-week average')
h2 = ax.fill_between(timerange, avg-rms, avg+rms,
                     alpha=0.5, edgecolor='#1B2ACC', facecolor='#089FFF',
                     label='17-week average RMS')

# Some reference values
h3 = ax.plot(timerange,np.full(len(timerange),avg[-1]),label='RMS with flat BG',color='orange')
ax.plot(timerange,avg[-1]+rms,color='orange')
ax.plot(timerange,avg[-1]-rms,color='orange')

# Manually change the order of the legend
handles, labels = plt.gca().get_legend_handles_labels()
order = [0,2,1]
plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order])