# Import Libraries #

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Load Data #

In [13]:
cgmdata = "/Users/venyo/ProDev/metaflow-practice/cgm.txt"

# Load raw file
df = pd.read_csv(
    cgmdata,
    sep="|",
    names=["iPtID", "Period", "DataDtTm", "CGM"],
    skiprows=1
)

# Look at the first few rows
df.head()

Unnamed: 0,iPtID,Period,DataDtTm,CGM
0,1,1. Baseline,11DEC17:23:59:25,172
1,1,1. Baseline,12DEC17:00:04:24,170
2,1,1. Baseline,12DEC17:00:09:24,167
3,1,1. Baseline,12DEC17:00:14:25,163
4,1,1. Baseline,12DEC17:00:19:25,160


# Convert Timestamp Column to Date Line #

In [14]:
df["DataDtTm"] = pd.to_datetime(
    df["DataDtTm"],
    format="%d%b%y:%H:%M:%S"
)

df




Unnamed: 0,iPtID,Period,DataDtTm,CGM
0,1,1. Baseline,2017-12-11 23:59:25,172
1,1,1. Baseline,2017-12-12 00:04:24,170
2,1,1. Baseline,2017-12-12 00:09:24,167
3,1,1. Baseline,2017-12-12 00:14:25,163
4,1,1. Baseline,2017-12-12 00:19:25,160
...,...,...,...,...
9032230,99,2. Post Randomization,2018-09-15 23:37:46,149
9032231,99,2. Post Randomization,2018-09-15 23:42:46,151
9032232,99,2. Post Randomization,2018-09-15 23:47:45,154
9032233,99,2. Post Randomization,2018-09-15 23:52:46,154


# Clean and Pre-Process Data #

In [15]:
df = df.drop_duplicates()
df = df.dropna()
#Only work with baseline periods so there is no bias
df = df[df['Period'] != '2. Post Randomization']
df

Unnamed: 0,iPtID,Period,DataDtTm,CGM
0,1,1. Baseline,2017-12-11 23:59:25,172
1,1,1. Baseline,2017-12-12 00:04:24,170
2,1,1. Baseline,2017-12-12 00:09:24,167
3,1,1. Baseline,2017-12-12 00:14:25,163
4,1,1. Baseline,2017-12-12 00:19:25,160
...,...,...,...,...
8981484,99,1. Baseline,2018-03-16 23:38:25,105
8981485,99,1. Baseline,2018-03-16 23:43:25,105
8981486,99,1. Baseline,2018-03-16 23:48:25,103
8981487,99,1. Baseline,2018-03-16 23:53:25,99


# Seperating by Day #
Testing with one Patient

In [21]:
df10 = df[df['iPtID'] == 10]

df10 = df10[df10["DataDtTm"] >= (pd.to_datetime('16/03/2018') - pd.Timedelta(days=30))]

df10


  df10 = df10[df10["DataDtTm"] >= (pd.to_datetime('16/03/2018') - pd.Timedelta(days=30))]


Unnamed: 0,iPtID,Period,DataDtTm,CGM
57016,10,1. Baseline,2018-04-03 23:58:52,236
57017,10,1. Baseline,2018-04-04 00:03:52,236
57018,10,1. Baseline,2018-04-04 00:08:53,239
57019,10,1. Baseline,2018-04-04 00:13:53,243
57020,10,1. Baseline,2018-04-04 00:18:53,243
...,...,...,...,...
61019,10,1. Baseline,2018-04-17 23:38:28,237
61020,10,1. Baseline,2018-04-17 23:43:28,235
61021,10,1. Baseline,2018-04-17 23:48:27,234
61022,10,1. Baseline,2018-04-17 23:53:28,235


In [None]:
summaries = []
glucose_readings = []
grouped = df10.groupby("DataDtTm")
day = ''

for date, group in grouped:
    glucose = group["CGM"]
    
    glucose_readings.append(glucose)


    mean = round(glucose.mean(),2)
    std = round(glucose.std(), 2)
    pct_high = round((glucose > 180).mean() * 100, 2)
    pct_low = round((glucose < 70).mean() * 100, 2)
    max_value = glucose.max()
    min_value = glucose.min()
    time_of_peak = group.loc[glucose.idxmax(), "DataDtTm"].time()
    time_of_lowest = group.loc[glucose.idxmin(), "DataDtTm"].time()

    


    
    summary = {
        "Date": str(date),
        "cgm_level": ,
        "StdDev": std,
        "PercentAbove180": pct_high,
        "PercentBelow70": pct_low,
        "Max": max_value,
        "Min": min_value,
        "TimeOfPeak": str(time_of_peak)
    }


    summaries.append(summary)
    
for s in summaries:
    print(s)

{'Date': '2018-04-03 23:58:52', 'Mean': 236.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 236, 'Min': 236, 'TimeOfPeak': '23:58:52'}
{'Date': '2018-04-04 00:03:52', 'Mean': 236.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 236, 'Min': 236, 'TimeOfPeak': '00:03:52'}
{'Date': '2018-04-04 00:08:53', 'Mean': 239.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 239, 'Min': 239, 'TimeOfPeak': '00:08:53'}
{'Date': '2018-04-04 00:13:53', 'Mean': 243.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 243, 'Min': 243, 'TimeOfPeak': '00:13:53'}
{'Date': '2018-04-04 00:18:53', 'Mean': 243.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 243, 'Min': 243, 'TimeOfPeak': '00:18:53'}
{'Date': '2018-04-04 00:23:53', 'Mean': 236.0, 'StdDev': nan, 'PercentAbove180': 100.0, 'PercentBelow70': 0.0, 'Max': 236, 'Min': 236, 'TimeOfPeak': '00:23:53'}
{'Date': '2018-04-04 00:28:52', 'M

In [None]:
sums = {}
counts = {}
mean_values = []


for row in summaries:
    for key, value in row.items():
        if isinstance(value, (int, float)) and value is not None:
            if key not in sums:
                sums[key] = 0
                counts[key] = 0
            sums[key] += value
            counts[key] += 1
            if key == "Mean":
                mean_values.append(value)

averages = {key: sums[key] / counts[key] for key in sums}


mean_of_means = averages["Mean"]
cgm_variability = math.sqrt(sum((x - mean_of_means) ** 2 for x in mean_values) / len(mean_values))


averages["CGM Variability"] = cgm_variability
averages.pop("StdDev")

print("Average summary:")
for key, avg in averages.items():
    print(f"{key}: {avg:.2f}")