In [None]:
import pandas as pd
import numpy as np
import WRIC_preprocessing as wric
import os
import glob
import re
import matplotlib.pyplot as plt
from collections import defaultdict
pd.set_option("display.max_rows", None)

In [None]:
# Path to all cgm files
folder_path = "D:/Simon_CIRCLE/CGM/"
skip_list = ["D:/Simon_CIRCLE/CGM/04HH_CGM_v1_treat0.csv"] # different format for some reason

meta_data = []
df_dictionary = {}

for filepath in glob.glob(folder_path + "*.csv"):
    print(filepath)
    if filepath not in skip_list:
        # get visit and treatment variable from file-name
        match = re.search(r'_v(\d+)_treat(\d+)\.csv$', filepath)
        if match:
            visit = int(match.group(1))
            treat = int(match.group(2))

        # read in csv file
        df = pd.read_csv(filepath, index_col="Index")

        # save meta data
        id = df.loc[1, "Patient Info"]
        dob = df.loc[df['Event Type'] == 'DateOfBirth', 'Patient Info'].values[0]
        meta_data.append({"ID" : id, "DateOfBirth" : dob, "visit" : visit, "treat" : treat})

        # drop unnecessary rows and columns and save in dict
        df = df[df['Timestamp (YYYY-MM-DDThh:mm:ss)'].notna()]
        df.reset_index(inplace=True)
        df = df.drop(columns=["Index","Event Type","Event Subtype","Patient Info","Device Info","Source Device ID", "Insulin Value (u)","Carb Value (grams)","Duration (hh:mm:ss)","Glucose Rate of Change (mmol/L/min)","Transmitter Time (Long Integer)","Transmitter ID"])
        df.rename(columns={'Timestamp (YYYY-MM-DDThh:mm:ss)' : "datetime", "Glucose Value (mmol/L)": "glucose"}, inplace=True)
        df["datetime"] = pd.to_datetime(df["datetime"])
        # Replace "low" with None (for visualization) - make sure this is know!
        df["glucose"] = np.where(df["glucose"] == "Low", None, df["glucose"]).astype(float)
        df = wric.add_relative_time(df)
        df_dictionary[f'{id}_t{str(treat)}'] = df
        
 
# convert meta-data to df   
df_meta = pd.DataFrame(meta_data)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
for person_id, df in df_dictionary.items():
    print(person_id)
    ax.plot(df['relative_time[min]'], df['glucose'], label=person_id)
        
#plt.xticks(rotation=45)
ax.set_xlabel("Relative Time (min)")
ax.set_ylabel("Glucose (mmol/L)")
ax.set_ylim(2, 16)
ax.set_title("Glucose Values Over Time for all participants (n=10) missing v2 of 04HH")

# Add a legend to distinguish each person
ax.legend(title="Person ID")

plt.tight_layout()
plt.show()

In [None]:
groups = defaultdict(list)

# Group identifiers based on the first four characters
for person_id in df_dictionary.keys():
    group_key = person_id[:4]  # Get the first four characters
    groups[group_key].append(person_id)
    
fig, axs = plt.subplots(nrows=4, ncols=3, figsize=(20, 6 * 4))

# Flatten the axs array if needed (for easy indexing)
axs = axs.flatten()

# Create subplots for each group
for ax, (group_id, person_ids) in zip(axs, groups.items()):
    # Plot each person in the group
    for person_id in person_ids:
        df = df_dictionary[person_id]  # Access the DataFrame for the person
        ax.plot(df['relative_time[min]'], df['glucose'], label=person_id.split('_')[1])  # Use v1 or v2 as label

    ax.set_xlabel("Relative Time in min")
    ax.set_ylabel("Glucose")
    ax.set_ylim(2, 16)
    ax.set_title(f"Glucose Values Over Time for Participant {group_id}")

    # Add a legend to distinguish each version
    ax.legend(title="Treatment")

# Adjust layout to prevent overlapping
plt.tight_layout()
plt.show()

In [None]:
# TODO: Make the relative time start at the same time of day for each person - or map from the back so longer would be fine? And then mark days and WRIC 
# Check skipping time stamps - maybe scatter plot not line?