In [1]:
import pandas as pd
import os
import shutil
from datetime import datetime, timedelta

for i in range(1,17):
    if i < 10:
        glucose = f"D:/REU_2024/BigIdeas/00{i}/Dexcom_00{i}.csv" # edit where to grab data
        period = f"D:/REU_2024/BigIdeas/00{i}/activity_bouts.csv" # and where to save data
        output_dir = f"D:/REU_2024/BigIdeas/00{i}/glucose_activity"
    if i > 9:
        glucose = f"D:/REU_2024/BigIdeas/0{i}/Dexcom_0{i}.csv"
        period = f"D:/REU_2024/BigIdeas/0{i}/activity_bouts.csv"
        output_dir = f"D:/REU_2024/BigIdeas/0{i}/glucose_activity"
    # Load the CSV files
    df_periods = pd.read_csv(period)  # Contains start_time, end_time, individual
    df_glucose = pd.read_csv(glucose, usecols=["Timestamp (YYYY-MM-DDThh:mm:ss)", "Glucose Value (mg/dL)"])  # Load only necessary columns
    
    # Convert time columns to datetime
    df_periods["start_time"] = pd.to_datetime(df_periods["start_time"])
    df_periods["end_time"] = pd.to_datetime(df_periods["end_time"])
    
    # Ensure correct timestamp column name
    df_glucose.rename(columns={"Timestamp (YYYY-MM-DDThh:mm:ss)": "glucose_timestamp"}, inplace=True)
    df_glucose.rename(columns={"Glucose Value (mg/dL)": "glucose"}, inplace=True)
    df_glucose["glucose_timestamp"] = pd.to_datetime(df_glucose["glucose_timestamp"])
    
    # If directory exists, delete it and recreate
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)
    
    # Iterate over each period and filter glucose data
    for idx, row in df_periods.iterrows():
        start_time = row["start_time"]
        end_time = row["end_time"]
    
        # Debug prints
        print(f"\nProcessing Period {idx}: {start_time} to {end_time}")
    
        # Filter glucose values within the time interval
        df_filtered = df_glucose[
            (df_glucose["glucose_timestamp"] >= start_time) & 
            (df_glucose["glucose_timestamp"] <= end_time + timedelta(hours=1))
        ].copy()

        df_before = df_glucose[df_glucose["glucose_timestamp"] < start_time].tail(1)

        # Find the glucose value just after the end time
        df_after = df_glucose[df_glucose["glucose_timestamp"] > end_time].head(1)
    
        # Append before and after values if they exist
        df_filtered = pd.concat([df_before, df_filtered, df_after])

        duration = ((end_time - start_time).total_seconds() / 60)
        df_filtered["period_duration (mins)"] = duration
        df_filtered["activity_start_time"] = start_time
        df_filtered["activity_end_time"] = end_time

        if len(df_filtered) < 3:
            print(f"Skipping Period {idx} due to insufficient data (only {len(df_filtered)} rows).")
            continue
    
        # Save to CSV file
        output_file = os.path.join(output_dir, f"glucose_period_{idx+1}.csv")
        df_filtered.to_csv(output_file, index=False)
    
        print(f"Saved: {output_file} | Rows: {len(df_filtered)}")

    print(f"Person {i} glucose saved!")



Processing Period 0: 2020-02-13 19:08:48 to 2020-02-13 19:25:01
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_1.csv | Rows: 17

Processing Period 1: 2020-02-16 15:31:02 to 2020-02-16 15:36:38
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_2.csv | Rows: 15

Processing Period 2: 2020-02-16 17:04:08 to 2020-02-16 17:22:41
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_3.csv | Rows: 17

Processing Period 3: 2020-02-17 16:57:37 to 2020-02-17 17:09:24
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_4.csv | Rows: 17

Processing Period 4: 2020-02-19 04:45:55 to 2020-02-19 04:52:06
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_5.csv | Rows: 15

Processing Period 5: 2020-02-20 17:26:15 to 2020-02-20 17:32:33
Saved: D:/REU_2024/BigIdeas/001/glucose_activity\glucose_period_6.csv | Rows: 15
Person 1 glucose saved!

Processing Period 0: 2020-02-21 15:05:39 to 2020-02-21 15:11:08
Saved: D:/REU_2024/BigIdeas/002/glu