In [None]:
import pandas as pd
import numpy as np
import WRIC_preprocessing as wric
import os
import glob
import re
import matplotlib.pyplot as plt
from collections import defaultdict
from matplotlib.ticker import MaxNLocator
import matplotlib.dates as mdates
pd.options.mode.chained_assignment = None
pd.set_option("display.max_rows", None)

In [None]:
# Path to all cgm files
folder_path = "D:/Simon_CIRCLE/CGM/"
skip_list = ["D:/Simon_CIRCLE/CGM/04HH_CGM_v1_treat0.csv"] # different format for some reason
#folder_path = "/media/nina/SUNSHINE/Simon_CIRCLE/CGM/"
#skip_list = ["/media/nina/SUNSHINE/Simon_CIRCLE/CGM/04HH_CGM_v1_treat0.csv"] 

meta_data = []
df_dictionary = {}

for filepath in glob.glob(folder_path + "*.csv"):
    print(filepath)
    if filepath not in skip_list:
        # get visit and treatment variable from file-name
        match = re.search(r'_v(\d+)_treat(\d+)\.csv$', filepath)
        if match:
            visit = int(match.group(1))
            treat = int(match.group(2))

        # read in csv file
        df = pd.read_csv(filepath, index_col="Index")

        # save meta data
        id = df.loc[1, "Patient Info"]
        dob = df.loc[df['Event Type'] == 'DateOfBirth', 'Patient Info'].values[0]
        meta_data.append({"ID" : id, "DateOfBirth" : dob, "visit" : visit, "treat" : treat})

        # drop unnecessary rows and columns and save in dict
        df = df[df['Timestamp (YYYY-MM-DDThh:mm:ss)'].notna()]
        df.reset_index(inplace=True)
        df = df.drop(columns=["Index","Event Type","Event Subtype","Patient Info","Device Info","Source Device ID", "Insulin Value (u)","Carb Value (grams)","Duration (hh:mm:ss)","Glucose Rate of Change (mmol/L/min)","Transmitter Time (Long Integer)","Transmitter ID"])
        df.rename(columns={'Timestamp (YYYY-MM-DDThh:mm:ss)' : "datetime", "Glucose Value (mmol/L)": "glucose"}, inplace=True)
        
        # Cut until 6pm on the first day for everyone (same timeline, but also according to Helene the first hours do not give realistic data anyways)
        df["datetime"] = pd.to_datetime(df["datetime"])
        first_day = df['datetime'].iloc[0].date()
        cutoff_time = pd.Timestamp(f'{first_day} 18:00:00')
        df = df[df['datetime'] >= cutoff_time]
        
        # Replace "low" with None (for visualization) - make sure this is know!
        df["glucose"] = np.where(df["glucose"] == "Low", None, df["glucose"]).astype(float)
        df = wric.add_relative_time(df)
        df_dictionary[f'{id}_t{str(treat)}'] = df
        
 
# convert meta-data to df   
df_meta = pd.DataFrame(meta_data)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
for person_id, df in df_dictionary.items():
    print(person_id)
    ax.plot(df['relative_time[min]'], df['glucose'], label=person_id)
        
#plt.xticks(rotation=45)
ax.set_xlabel("Relative Time (min)")
ax.set_ylabel("Glucose (mmol/L)")
ax.set_ylim(2, 16)
ax.set_title("Glucose Values Over Time for all participants (n=10) missing v2 of 04HH")

# Add a legend to distinguish each person
ax.legend(title="Person ID")

plt.tight_layout()
# plt.savefig("D:/Simon_CIRCLE/Visualizations/glucose_all.png")
plt.show()

In [None]:
groups = defaultdict(list)

# Group identifiers based on the first four characters
for person_id in df_dictionary.keys():
    group_key = person_id[:4]  # Get the first four characters
    groups[group_key].append(person_id)
    
fig, axs = plt.subplots(nrows=4, ncols=3, figsize=(20, 6 * 4))

# Flatten the axs array if needed (for easy indexing)
axs = axs.flatten()

# Create subplots for each group
for ax, (group_id, person_ids) in zip(axs, groups.items()):
    # Plot each person in the group
    for person_id in person_ids:
        df = df_dictionary[person_id]  # Access the DataFrame for the person
        ax.plot(df['relative_time[min]'], df['glucose'], label=person_id.split('_')[1])  # Use v1 or v2 as label

    ax.set_xlabel("Relative Time in min")
    ax.set_ylabel("Glucose")
    ax.set_ylim(2, 16)
    ax.set_title(f"Glucose Values Over Time for Participant {group_id}")
    
    x = 1440 # minutes in a day
    y = 360 # first 6 hours
    tick_positions = [y + i * x for i in range(6)]  
    tick_labels = [f"Day {day}" for day in ["2", '3', '4', '5', '6', '7']]
    ax.set_xticks(tick_positions)
    ax.set_xticklabels(tick_labels)
    for tick in tick_positions:
        ax.axvline(x=tick, color='black', linestyle='--', alpha=0.5)  # Dashed line at each tick position


    # Add a legend to distinguish each version
    ax.legend(title="Treatment")

# Adjust layout to prevent overlapping
plt.tight_layout()
plt.savefig("/media/nina/SUNSHINE/Simon_CIRCLE/Visualizations/glucose_values_by_person.png")
plt.show()

In [None]:
# TODO: Make the relative time start at the same time of day for each person - or map from the back so longer would be fine? And then mark days and WRIC 
# Check skipping time stamps - maybe scatter plot not line?

# Adding CGM, Temperature and EE in one plot

In [None]:
# Path to the folder containing the files
# folder_path = "/media/nina/SUNSHINE/Simon_CIRCLE/WRIC/processed"
folder_path = "D:/Simon_CIRCLE/WRIC/processed"
# Step 1: Get all files ending with "_combined.csv"
csv_files = [file for file in os.listdir(folder_path) if file.endswith("_combined.csv")]

protocol_colors_labels = {
    0: {"color": "white", "label": "Normal"},
    1: {"color": "blue", "label": "Sleep"},
    2: {"color": "orange", "label": "Eating"},
    3: {"color": "yellow", "label": "Exercise"},
    4: {"color": "green", "label": "RER"},
}

# Step 2: Read files into DataFrames
wric_dataframes = {}  # List to store all DataFrames

for file in csv_files:
    # Read the file
    match = re.search(r"^(\w{4})_v\d+_treat(\d)", file)
    if match:
        # Extract id and treat
        file_id = match.group(1)  # id is the first captured group
        treat_value = match.group(2)  # treat is the second captured group
        print(f"ID: {file_id}, Treat: {treat_value}")
    else:
        print("Pattern not found in filename")
    file_path = os.path.join(folder_path, file)
    wric_df = pd.read_csv(file_path)
    wric_dataframes[f"{file_id}_t{treat_value}"] = wric_df
    
    # get corresponding cgm file
    cgm = df_dictionary[f"{file_id}_t{treat_value}"]
    cgm["datetime"] = pd.to_datetime(cgm["datetime"])
    wric_df["datetime"] = pd.to_datetime(wric_df["datetime"])

    # cut cgm data based on time of wric data
    start_time = wric_df["datetime"].min()
    end_time = wric_df["datetime"].max()
    cgm_filtered = cgm[(cgm["datetime"] >= start_time) & (cgm["datetime"] <= end_time)]
    
    # Step 3: Plot energy expenditure ("ee") over time
    fig, ax1 = plt.subplots()
    ax1.plot(wric_df["datetime"], wric_df["Energy Expenditure (kJ/min)"], label="Energy Expenditure", color="blue")
    ax2 = ax1.twinx()
    ax2.plot(cgm_filtered['datetime'], cgm_filtered['glucose'], label = "CGM", color="orange")
    ax2.set_ylabel("CGM")

    # Add labels, legend, and title
    ax1.set_xlabel("Time")
    ax1.set_ylabel("Energy Expenditure (kJ/min)")
    plt.title(f"Energy Expenditure and CGM Over Time for {file_id}")
    fig.legend()

    ax1.xaxis.set_major_locator(mdates.HourLocator(interval=3))
    ax1.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
    ax1.xaxis.set_major_locator(MaxNLocator(nbins=13))
    plt.xticks(rotation=45)

    # Highlight background based on "protocol" values
    for protocol, protocol_info in protocol_colors_labels.items():
        if protocol != 0:  # Skip coloring for protocol 0
            # Find contiguous segments for the current protocol
            protocol_mask = wric_df["protocol"] == protocol
            df_protocol = wric_df[protocol_mask]
            
            # Group by contiguous blocks using diff()
            df_protocol['block'] = (df_protocol["relative_time[min]"].diff() > 1).cumsum()

            # Loop through each contiguous block
            for _, block_group in df_protocol.groupby("block"):
                start_time = block_group["relative_time[min]"].min()
                end_time = block_group["relative_time[min]"].max()
                plt.axvspan(start_time, end_time, color=protocol_info["color"], alpha=0.3, label=protocol_info["label"])

    # Drop the temporary "block" column (if necessary elsewhere in the code)
    if 'block' in wric_df.columns:
        del wric_df["block"]
    
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys(), loc="upper right")

    plt.grid(True)
    plt.tight_layout()

    # Show and save
    # save_path = f"/media/nina/SUNSHINE/Simon_CIRCLE/Visualizations/EE_{file.replace('.csv', '.png')}"
    save_path = f"D:/Simon_CIRCLE/Visualizations/EE_CGM_{file_id}_t{treat_value}.png"
    # plt.savefig(save_path)
    plt.show()
    plt.close()

In [None]:
# examplary for 01JJ

wric_path = "D:/Simon_CIRCLE/WRIC/processed/01JJ_v1_treat0_WRIC_data_combined.csv"
wric_df = pd.read_csv(wric_path)

cgm = df_dictionary["01JJ_t0"]

# integrate wric and cgm based on timestamp into one df (only for WRIC time)
cgm["datetime"] = pd.to_datetime(cgm["datetime"])
wric_df["datetime"] = pd.to_datetime(wric_df["datetime"])

# cut cgm data based on time of wric data
start_time = wric_df["datetime"].min()
end_time = wric_df["datetime"].max()
cgm_filtered = cgm[(cgm["datetime"] >= start_time) & (cgm["datetime"] <= end_time)]

fig, ax1 = plt.subplots()
ax1.plot(wric_df["datetime"], wric_df["Energy Expenditure (kJ/min)"], label="Energy Expenditure", color="blue")
ax2 = ax1.twinx()
ax2.plot(cgm_filtered['datetime'], cgm_filtered['glucose'], label = "CGM", color="orange")
ax2.set_ylabel("CGM")

# Add labels, legend, and title
ax1.set_xlabel("Time")
ax1.set_ylabel("Energy Expenditure (kJ/min)")
plt.title(f"Energy Expenditure and CGM Over Time for 01JJ_t0")
fig.legend()

ax1.xaxis.set_major_locator(mdates.HourLocator(interval=3))
ax1.xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
plt.xticks(rotation=45)

plt.grid(True)
plt.tight_layout()

# Show and save
save_path = f"D:/Simon_CIRCLE/Visualizations/EE_CGM_{file_id}_t{treat_value}.png"
# plt.savefig(save_path)
plt.show()
plt.close()

# TODO: Align time