# Data Import and Constants

In [None]:
import pandas as pd
import numpy as np
import os
import glob
import cgmquantify as cgm
from statistics import mean

In [None]:
# Define constants
PATH = r'' #Define path to all glucose entries files
EXTENSION = 'csv'
RANGES = [0,70,180,350] # Define store Glucose ranges

# Get files and IDs
files = glob.glob(os.path.join(PATH, f'*.{EXTENSION}'))
ids = [os.path.basename(f).split('_entries', 1)[0].lstrip('0') for f in files]
files_id = pd.DataFrame({'id': ids})

# Defining the process_file function

In [None]:
def process_file(f, BL_lower_lim, BL_upper_lim):
    """
    Function to read and preprocess the data file

    Parameters:
    f (str): path to the data file
    BL_lower_lim (float): lower limit for bad low glucose values
    BL_upper_lim (float): upper limit for bad low glucose values

    Returns:
    df (pandas.DataFrame): preprocessed data

    Example:
    df = process_file('data.csv', 55, 70)
    """
    df = pd.read_csv(f, index_col=None, na_values=" null")
    df = df.rename(columns={'Timestamp': 'Time', 'glucose': 'Glucose'})
    df.index = pd.to_datetime(df["Time"], format='%Y-%m-%d %H:%M:%S') 
    df = df.loc[~df.index.duplicated(keep='first')] # remove duplicate indexes
    df = df.replace([np.inf, -np.inf], np.nan).dropna() # Replace infinite data with nan and drop rows with NaN
    df = df.loc[df['Glucose'].between(40, 1000)] # Keep only glucose values between 40 and 1000
    df.loc[df['Glucose'] > 400, 'Glucose'] = 400 # Replace glucose values greater than 400 with 400
    df.reset_index(drop=True, inplace=True)
    df['low'] = df['Glucose'].where(df['Glucose'].between(BL_lower_lim, BL_upper_lim)) # Mark values between the BL limits

    # Assign bin level (ranges) to Glucose values
    df['ranges'] = pd.cut(df['Glucose'], bins=RANGES)

    return df


# Defining the hypoGV function

In [None]:

def hypoGV(df, t_range, direction):
    """
    Function to calculate the Out-of-Whack Glycemic Variability metrics

    Parameters:
    df (pandas.DataFrame): preprocessed data from process_file function
    t_range (int): range of data points to consider after each bad low event
    direction (str): either 'forward' (after hypo event) or 'backward' (before hypo event)

    Returns:
    stats (list): a list of mean metric values

    Example:
    stats = hypoGV(df, 144, 'backward')
    """
    # Prepare to store metrics before and after a bad low
    metrics = {name: [] for name in ['TORless70', 'TIR', 'TORmore180', 'stdd', 'POR', 'J_index', 'LBGI', 'HBGI', 'GMI']}
    
    for ind, row in df.loc[~df['low'].isna()].iterrows(): # For each bad low, iteratively process the data
        if direction == 'forward' and ind + t_range < len(df):
            df2 = df.loc[ind:ind+t_range] # Slice dataframe forward
        elif direction == 'backward' and ind - t_range >= 0:
            df2 = df.loc[ind-t_range:ind] # Slice dataframe backward
        else:
            continue

        df2.index = pd.to_datetime(df2["Time"], format='%Y-%m-%d %H:%M:%S') # Reindex with timestamps for using cgmquantify formulas

        # Calculate TIR and TOR ranges
        result = df2.groupby([pd.Grouper(key="Time"),"ranges"])["ranges"].count().unstack(0).T.fillna(0)
        summed_results = result.sum()
        metrics['TORless70'].append(summed_results.iloc[0]/summed_results.sum()*100)
        metrics['TIR'].append(summed_results.iloc[1]/summed_results.sum()*100)
        metrics['TORmore180'].append(summed_results.iloc[2]/summed_results.sum()*100)
            
        # Calculate other variability metrics
        metrics['stdd'].append(df2['Glucose'].std())
        metrics['POR'].append(cgm.POR(df2))
        metrics['J_index'].append(cgm.J_index(df2))
        metrics['LBGI'].append(cgm.LBGI(df2))
        metrics['HBGI'].append(cgm.HBGI(df2))
        metrics['GMI'].append(cgm.GMI(df2))

    # Calculate mean metrics for each patient
    stats = [mean(values) for values in metrics.values()]

    return stats

# Invoking Functions and Output

In [None]:
# Process each file and calculate stats
all_stats = [hypoGV(process_file(f, BL_lower_lim=55, BL_upper_lim=70), t_range=144, direction='backward') for f in files]

# Convert mean variability to df and assign column names
statistics = pd.DataFrame(all_stats, columns=['Mean STD', 'Mean TOR<70 [%]', 'Mean TIR [%]', 'Mean TOR>180 [%]', 'Mean POR', 'Mean J_index', 'Mean LGBI', 'Mean HBGI', 'Mean GMI'])

statistics_with_ids = pd.concat([files_id, statistics], axis=1)

statistics_with_ids