<a href="https://colab.research.google.com/github/chxtio/Data-Mining-Cell-Glucose-Monitoring/blob/master/Data_Mining_Project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Extracting Time Series Properties of Glucose Levels in Artificial Pancreas

Import required packages

In [1]:
import numpy as np
import pandas as pd

Assume 288 samples per day

In [2]:
TOTAL_PER_DAY = 288
DROP_THRESHOLD = .8

## Data Cleaning

In [3]:
def clean_data(cgm_modes, cgm_dates):
  # Remove NaN rows
  for i in range(len(cgm_modes)):
    if i == 0:
      print("Manual Mode")
    else:
      print("Auto Mode")
    print("Dropping", cgm_modes[i].isnull().values.sum(), "rows")
    cgm_modes[i] = cgm_modes[i].dropna()

  # Delete days containing less values than drop threshold
  for i in range(len(cgm_modes)):
    count = 0
    for day in cgm_dates[i]:
      day_df =  cgm_modes[i][cgm_modes[i]['Date_Time'].dt.date == day]
      if (len(day_df) / TOTAL_PER_DAY < DROP_THRESHOLD) and (day != cgm_dates[0].max()):
        cgm_modes[i][cgm_modes[i]['Date_Time'].dt.date == day]  = np.nan
        count += 1
    cgm_modes[i] = cgm_modes[i].dropna()

  return cgm_modes, cgm_dates

## Filter sets based on glucose metrics

Time intervals:

        overnight: 00:00:00 - 05:59:59
        daytime: 06:00:00 - 23:59:59
        whole day: 00:00:00 - 23:59:59        

In [4]:
def filter_data(interval, cgm_modes, cgm_dates):
  dict_list = [{}, {}]
  df_list = []
  metrics = ['percentage time in hyperglycemia(CGM > 180 mg / dL)', 
            'percentage of time in hyperglycemia critical(CGM > 250 mg / dL)', 
            'percentage time in range(CGM >= 70 mg / dL and CGM <= 180 mg / dL)',
            'percentage time in range secondary(CGM >= 70 mg / dL and CGM <= 150 mg / dL)',
            'percentage time in hypoglycemia level 1(CGM < 70 mg / dL)',
            'percentage time in hypoglycemia level 2(CGM < 54 mg / dL)']
  for i in range(len(cgm_modes)):
    if i == 0:
      print("Manual Mode\n")
    else:
      print("Auto Mode\n")
    days = [day for day in cgm_dates[i]]
    for j in range(len(metrics)):
      percentages = []
      for day in cgm_dates[i]: 
        # Choose segment- default is Whole day    
        day_df =  cgm_modes[i][cgm_modes[i]['Date_Time'].dt.date == day]
        if interval == "Overnight":
          day_df = day_df[day_df.Date_Time.dt.hour < 6]
        elif interval == "Daytime":
          day_df = day_df[day_df.Date_Time.dt.hour >= 6]
        # Extract metrics
        if metrics[j] == metrics[0]:
          percentage = (len(day_df[day_df['Sensor Glucose (mg/dL)'] > 180]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
        elif metrics[j] == metrics[1]:
          percentage = (len(day_df[day_df['Sensor Glucose (mg/dL)'] > 250]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
        elif metrics[j] == metrics[2]:
          percentage = (len(day_df[(day_df['Sensor Glucose (mg/dL)'] >= 70) & (day_df['Sensor Glucose (mg/dL)'] <= 180)]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
        elif metrics[j] == metrics[3]:
          percentage = (len(day_df[(day_df['Sensor Glucose (mg/dL)'] >= 70) & (day_df['Sensor Glucose (mg/dL)'] <= 150)]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
        elif metrics[j] == metrics[4]:
          percentage = (len(day_df[day_df['Sensor Glucose (mg/dL)'] < 70]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
        elif metrics[j] == metrics[5]:
          percentage = (len(day_df[day_df['Sensor Glucose (mg/dL)'] < 54]) * 100) / TOTAL_PER_DAY
          percentages.append(percentage)
      dict_list[i][interval + " " + metrics[j]] = pd.Series(percentages, index=days)
    df_list.append(pd.DataFrame(dict_list[i]))
    #display(dict_list)
    print(df_list[i])

  return df_list

## Display analysis results

In [5]:
def display(dict_list):
  for i in range(len(dict_list)):
    if i == 0:
      print("Manual Mode\n")
    else:
      print("Auto Mode\n")
    for key, value in dict_list[i].items():
      print(key, value, sep="\n", end="\n\n")

## Output the metrics

In [6]:
def get_result(df_overnight_list, df_day_list, df_whole_list):
  manual_results = []
  auto_results = []
  
  for i in range(len(df_overnight_list)):
    for col in df_overnight_list[i].columns:
      if i == 0:
        manual_results.append(df_overnight_list[i][col].mean())
      else:
        auto_results.append(df_overnight_list[i][col].mean())

  for i in range(len(df_day_list)):
    for col in df_day_list[i].columns:
      if i == 0:
        manual_results.append(df_day_list[i][col].mean())
      else:
        auto_results.append(df_day_list[i][col].mean())

  for i in range(len(df_whole_list)):
    for col in df_whole_list[i].columns:
      if i == 0:
        manual_results.append(df_whole_list[i][col].mean())
      else:
        auto_results.append(df_whole_list[i][col].mean())

  return manual_results, auto_results  

In [7]:
def main():
  # Read continuous glucose monitor (CGM) sensor data
  sensorData = pd.read_csv('CGMData.csv', usecols=['Date','Time', 'Sensor Glucose (mg/dL)'], parse_dates=[['Date', 'Time']])

  # Read insulin pump data
  insulinData = pd.read_csv('InsulinData.csv', usecols=['Date','Time', 'Alarm'], parse_dates=[['Date', 'Time']])

  # Determine the start of Auto Mode from InsulinData.csv
  auto_mode_ts = insulinData[insulinData['Alarm'] == 'AUTO MODE ACTIVE PLGM OFF']['Date_Time'].min()

  # Find the timestamp for start of Auto Mode in CGMData.csv
  cgm_auto_ts = sensorData[sensorData['Date_Time'] >= auto_mode_ts]['Date_Time'].min()

  # Partition CGM data into separate modes (Manual and Auto)
  print("Partitioning data...")
  cgm_modes = [sensorData[sensorData['Date_Time'] < cgm_auto_ts], sensorData[sensorData['Date_Time'] >= cgm_auto_ts]]
  cgm_dates = [cgm_modes[0].Date_Time.dt.date.unique(), cgm_modes[1].Date_Time.dt.date.unique()]
  for i in range(len(cgm_modes)):
    if i == 0:
      print("Manual Mode")
    else:
      print("Auto Mode")
    print(cgm_modes[i], "\n\n")

  # Get number of days for each mode
  manual_days, auto_days = cgm_dates[0].shape[0], cgm_dates[1].shape[0]
  print("Days in Manual Mode: ", manual_days, "\nDays in Auto Mode: ", auto_days)

  # Clean the data
  print("\nCleaning the data...")
  cgm_modes, cgm_dates = clean_data(cgm_modes, cgm_dates)
  cgm_dates = [cgm_modes[0].Date_Time.dt.date.unique(), cgm_modes[1].Date_Time.dt.date.unique()]
  manual_days, auto_days = cgm_dates[0].shape[0], cgm_dates[1].shape[0]
  print("Days in Manual Mode: ", manual_days, "\nDays in Auto Mode: ", auto_days)

  # Filter sets based on glucose metrics
  print("\nFiltering the data...")  
  intervals = ['Overnight', 'Daytime', 'Whole day']
  df_overnight_list = filter_data(intervals[0], cgm_modes, cgm_dates)
  df_day_list = filter_data(intervals[1], cgm_modes, cgm_dates)
  df_whole_list = filter_data(intervals[2], cgm_modes, cgm_dates)
  
  # Output 2x18 results in CSV file
  results = np.zeros((2,18))
  results[0], results[1] = get_result(df_overnight_list, df_day_list, df_whole_list)
  pd.DataFrame(results).to_csv('./Results.csv', header=False, index_label=False, index=False)

In [8]:
if __name__ == "__main__":
  main()

Partitioning data...
Manual Mode
                Date_Time  Sensor Glucose (mg/dL)
51087 2017-08-09 08:05:05                   159.0
51088 2017-08-09 08:00:05                   159.0
51089 2017-08-09 07:55:05                   158.0
51090 2017-08-09 07:50:05                   157.0
51091 2017-08-09 07:45:05                   160.0
...                   ...                     ...
55338 2017-07-25 12:28:54                   311.0
55339 2017-07-25 12:23:54                   311.0
55340 2017-07-25 12:18:54                   309.0
55341 2017-07-25 12:13:54                   310.0
55342 2017-07-25 12:08:54                   314.0

[4256 rows x 2 columns] 


Auto Mode
                Date_Time  Sensor Glucose (mg/dL)
0     2018-02-12 13:22:27                   118.0
1     2018-02-12 13:17:27                   122.0
2     2018-02-12 13:12:27                     NaN
3     2018-02-12 13:07:27                     NaN
4     2018-02-12 13:02:27                     NaN
...                   ...    