In [44]:
import pandas as pd
import numpy as np
import preprocessing
import metrics_experiment
from sklearn.metrics import auc
import metrics_helper
from datetime import timedelta
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings('ignore')
# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [23]:
libre_tests = pd.read_csv('../tests/test-data/hypo_episodes/libre_tests.csv')
dexcom_tests = pd.read_csv('../tests/test-data/hypo_episodes/dexcom_tests.csv')

In [48]:
def calc_episodes(df, hypo, thresh, mins, long_mins=None, lv2=False):
    # Gives a consecutive unique number to each set of consecutive readings below
    # 3.9mmol/L
    if hypo:
        bool_array = df['glc'] < thresh
    else:
        bool_array = df['glc'] > thresh
        print(bool_array)
    unique_num = bool_array.ne(bool_array.shift()).cumsum()
    number_consec = unique_num.map(unique_num.value_counts()).where(bool_array)
    df_unique = pd.DataFrame({'time_rep': df['time'], 'glc_rep':
                            df['glc'], 'unique_number_low': unique_num,
                            'consec_readings_low': number_consec})
    print(df_unique.tail())
    # Drop any null glucose readings and reset index
    df_unique.dropna(subset=['glc_rep'], inplace=True)
    df_unique.reset_index(inplace=True, drop=True)

    # Group by the unique number to collapse into episodes, then use min to
    # calculate the minimum glucose for each bout and the start time
    unique_min = df_unique.groupby('unique_number_low').min()

    # Use the start time of bouts and periods between bouts to calculate duration
    # of episodes
    #unique_min['diff'] = unique_min.time_rep.diff().shift(-1)
    unique_min['diff'] = df_unique.groupby('unique_number_low').apply(lambda group: group.iloc[-1]['time_rep']-group.iloc[0]['time_rep'])
    print(unique_min)
    if lv2:
        # Only keep hypos that are 15 mins or longer (smaller than this doesn't count)
        long_hypos = unique_min[(unique_min['diff'] >= timedelta(minutes=long_mins))&(pd.notnull(unique_min['consec_readings_low']))]
        long_hypos = long_hypos.shape[0]

    # Only keep hypos that are 15 mins or longer (smaller than this doesn't count)
    results = unique_min[unique_min['diff'] >= timedelta(minutes=mins)]

    # Fill the consec readings with binary value to show whether they are hypos or
    # the periods between hypos
    results.consec_readings_low = results.consec_readings_low.fillna(-1)
    results['hypo'] = results['consec_readings_low'] > 0

    # Merge any consecutive values left by removal of too-short episodes using
    # a new unique number
    results['unique'] = results['hypo'].ne(results['hypo'].shift()).cumsum()

    # Group by the unique number, select the min values and select relevant columns
    final_results = results.groupby('unique').min()[['time_rep', 'glc_rep', 'hypo', 'diff']]

    # Calculate difference between hypo and non-hypo periods and shift column up to
    # get the final duration of the periods
    final_results['diff2'] = final_results['time_rep'].diff().shift(-1)

    # Drop the non-hypo periods and then drop the hypo column
    final_results = final_results.loc[final_results['hypo'] ==
                                      True].drop(columns=['hypo'])

    # Rename columns
    final_results.columns = ['start_time', 'min_glc', 'initial_duration', 'duration']

    # Fill final hypo with previous duration value in diff col then drop initial
    # duration
    finaL_results = final_results['duration'].fillna(final_results['initial_duration'])
    final_results.drop(columns=['initial_duration'], inplace=True)
    final_results.reset_index(drop=True, inplace=True)
    # Drop the final column if it's less than 15 mins
    final_results = final_results.loc[final_results['duration']>=
                                      timedelta(minutes=mins)]

    # Create new column identifying if the hypo is level 2 (<3mmol/L)
    #final_results['lv2'] = final_results['min_glc'] < lv2_thresh
    
    # Reset index
    final_results.reset_index(drop=True, inplace=True)
    
    # Calculate overview statistics
    number_hypos = final_results.shape[0]

    if lv2:
        if hypo:
            return {'Level 2 hypoglycemic episodes':number_hypos, f'Prolonged level 2 hypoglycemic episodes ({long_mins} mins)':long_hypos}
        else:
            return {'Level 2 hyperglycemic episodes':number_hypos, f'Prolonged level 2 hyperglycemic episodes ({long_mins} mins)':long_hypos}
    else:
        if hypo:
            return {'Total hypoglycemic episodes':number_hypos}
        else:
            return {'Total hyperglycemic episodes':number_hypos}

In [51]:
def number_of_episodes(df, hypo=True, mins=15, long_mins=120, lv1_hypo_thresh=3.9, lv2_hypo_thresh=3, lv1_hyper_thresh=10, lv2_hyper_thresh=13.9):
    '''
    Replacement helper for number of hypos
    '''
    # Set the time column to datetime and sort by it
    df['time'] = pd.to_datetime(df['time'])
    df.sort_values('time', inplace=True)
    #hypo_1 = calc_episodes(df, hypo=True, thresh=lv1_hypo_thresh, mins=mins, lv2=False)
    #hypo_2 = calc_episodes(df, hypo=True, thresh=lv2_hypo_thresh, mins=mins, long_mins=long_mins, lv2=True)
    hyper_1 = calc_episodes(df, hypo=False, thresh=lv1_hyper_thresh, mins=mins, lv2=False)
    hyper_2 = calc_episodes(df, hypo=False, thresh=lv2_hyper_thresh, mins=mins,  long_mins=long_mins, lv2=True)
    print(hyper_1)
    print(hyper_2)
    '''avg_length = final_results.duration.mean().round('1s')
    total_time_hypo = final_results.duration.sum()
    
    # Return 0s if no hypos and nan if something weird happens
    if pd.notnull(avg_length):
        avg_length = avg_length#.total_seconds() / 60
        total_time_hypo = total_time_hypo#.total_seconds() / 60
    elif number_hypos == 0:
        avg_length = 0
        total_time_hypo = 0
    else:
        avg_length = np.nan
        total_time_hypo = np.nan
        
    # Divide total hypos into number of level 1 and level 2 hypos
    number_lv2_hypos = final_results[final_results['lv2']].shape[0]
    number_lv1_hypos = number_hypos - number_lv2_hypos
    
    # Save as dataframe and return
    frame = {'Total hypoglycemic episodes':number_hypos, 'Level 1 hypoglycemic episodes':number_lv1_hypos, 'Level 2 hypoglycemic episodes':number_lv2_hypos,
                           'Average length of hypoglycemic episodes':str(avg_length), 'Total time in hypoglycemia':str(total_time_hypo)}'''
    #hypo_1.update(hypo_2)
    #hypo_1.update(hyper_1)
    #hypo_1.update(hyper_2)
    #return hypo_1

In [52]:
df = libre_tests[['time', 'test10']]
df.columns = ['time', 'glc']
df = df.dropna()
df = df.iloc[0:16]
number_of_episodes(df)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13     True
14     True
15     True
Name: glc, dtype: bool
              time_rep  glc_rep  unique_number_low  consec_readings_low
0  2018-05-21 23:50:00      6.8                  1                  NaN
1  2018-05-22 00:05:00      6.9                  1                  NaN
2  2018-05-22 00:20:00      7.6                  1                  NaN
3  2018-05-22 00:35:00      8.0                  1                  NaN
4  2018-05-22 00:50:00      7.4                  1                  NaN
5  2018-05-22 01:05:00      6.7                  1                  NaN
6  2018-05-22 01:20:00      6.7                  1                  NaN
7  2018-05-22 01:35:00      6.7                  1                  NaN
8  2018-05-22 01:50:00      6.8                  1                  NaN
9  2018-05-22 02:05:00      7.5                  1               

In [43]:
metrics

{'Total hypoglycemic episodes': 0,
 'Level 2 hypoglycemic episodes': 0,
 'Prolonged level 2 hypoglycemic episodes (120 mins)': 0,
 'Total hyperglycemic episodes': 0,
 'Level 2 hyperglycemic episodes': 0,
 'Prolonged level 2 hyperglycemic episodes (120 mins)': 0}

In [32]:
results = []
for i in libre_tests.columns.drop('time'):
    df = libre_tests[['time', i]]
    df.columns = ['time', 'glc']
    df = df.dropna()
    metrics = number_of_episodes(df)
    results.append(metrics)
results = pd.DataFrame(results)

Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results.consec_readings_low = results.consec_readings_low.fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['hypo'] = results['consec_readings_low'] > 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['unique'] = results['hypo'].ne(results['hypo'].shift()).cumsum()
A value is

Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results.consec_readings_low = results.consec_readings_low.fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['hypo'] = results['consec_readings_low'] > 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['unique'] = results['hypo'].ne(results['hypo'].shift()).cumsum()
A value is

Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []
Empty DataFrame
Columns: [time_rep, glc_rep, consec_readings_low, diff]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results.consec_readings_low = results.consec_readings_low.fillna(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['hypo'] = results['consec_readings_low'] > 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['unique'] = results['hypo'].ne(results['hypo'].shift()).cumsum()
A value is

In [33]:
results

Unnamed: 0,Total hypoglycemic episodes,Level 2 hypoglycemic episodes,Prolonged level 2 hypoglycemic episodes (120 mins),Total hyperglycemic episodes,Level 2 hyperglycemic episodes,Prolonged level 2 hyperglycemic episodes (120 mins)
0,0,0,0,0,0,0
1,1,0,0,0,0,0
2,1,1,0,0,0,0
3,1,0,0,0,0,0
4,1,1,0,0,0,0
5,2,1,0,0,0,0
6,0,0,0,1,0,0
7,0,0,0,1,1,0
8,0,0,0,1,1,0
9,0,0,0,0,0,0
