In [2]:
import sys
import os

current_dir = os.getcwd()

project_root = os.path.join(current_dir, '..', '..')

if project_root not in sys.path:
    sys.path.append(project_root)


In [3]:
from influxdb import InfluxDBClient
import pandas as pd
from datetime import datetime

from src.querying import setup_connection, fill_nulls, query_garmin

In [4]:
client = setup_connection()

sex = 'M'

Successfully connected to InfluxDB. Available databases: [{'name': 'GarminStats'}, {'name': '_internal'}]


In [1]:
query_vo2 = """
SELECT "sleepTimeSeconds" FROM "SleepSummary" 
"""

df = fill_nulls(query_garmin(client, query_vo2))

NameError: name 'fill_nulls' is not defined

In [6]:
df = df.reset_index()

df = df.rename(columns={'time': 'WakeTime'})

# Convert the 'Sleep wake time' column to datetime objects
df['WakeTime'] = pd.to_datetime(df['WakeTime'])

# Calculate 'Going to sleep time' by subtracting the duration from the wake time
df['SleepTime'] = df['WakeTime'] - pd.to_timedelta(df['sleepTimeSeconds'], unit='s')

# Get the 'Sleep Date' based on the 'Going to sleep time'
df['SleepDate'] = df['WakeTime'].dt.date - pd.Timedelta(days=1)

# Calculate 'Sleep duration (hours)'
df['SleepDuration'] = df['sleepTimeSeconds'] / 3600

# Select and reorder the final columns
df_final = df[['SleepDate', 'SleepTime', 'WakeTime', 'SleepDuration']]


In [8]:
df_final

Unnamed: 0,SleepDate,SleepTime,WakeTime,SleepDuration
0,2024-02-07,2024-02-07 23:43:00+00:00,2024-02-08 09:05:00+00:00,9.366667
1,2024-02-09,2024-02-09 23:58:00+00:00,2024-02-10 08:21:00+00:00,8.383333
2,2024-02-10,2024-02-11 00:12:00+00:00,2024-02-11 08:20:00+00:00,8.133333
3,2024-02-11,2024-02-11 23:54:00+00:00,2024-02-12 08:24:00+00:00,8.500000
4,2024-02-12,2024-02-13 00:30:00+00:00,2024-02-13 08:55:00+00:00,8.416667
...,...,...,...,...
545,2025-08-13,2025-08-14 02:05:51+00:00,2025-08-14 07:00:51+00:00,4.916667
546,2025-08-14,2025-08-15 00:00:44+00:00,2025-08-15 08:10:44+00:00,8.166667
547,2025-08-15,2025-08-15 23:59:43+00:00,2025-08-16 08:30:43+00:00,8.516667
548,2025-08-16,2025-08-17 00:05:25+00:00,2025-08-17 08:58:25+00:00,8.883333


In [9]:
import pandas as pd
import numpy as np

def calculate_whoop_consistency(df_input):
    """
    Calculates a WHOOP-style consistency metric for a given DataFrame slice.
    """
    df = df_input.copy()

    # Standardize sleep times to a 24-hour cycle (minutes from midnight)
    df['bedtime_minutes'] = df['SleepTime'].dt.hour * 60 + df['SleepTime'].dt.minute
    df['wake_up_minutes'] = df['WakeTime'].dt.hour * 60 + df['WakeTime'].dt.minute

    # Create a binary array for each day's sleep pattern
    sleep_patterns = []
    for index, row in df.iterrows():
        daily_pattern = [0] * (24 * 60) # 24 hours * 60 minutes
        bed_time = int(row['bedtime_minutes'])
        wake_time = int(row['wake_up_minutes'])

        if wake_time < bed_time: # Handle wrap-around midnight
            for minute in range(bed_time, 24 * 60):
                daily_pattern[minute] = 1
            for minute in range(0, wake_time):
                daily_pattern[minute] = 1
        else:
            for minute in range(bed_time, wake_time):
                daily_pattern[minute] = 1
        sleep_patterns.append(daily_pattern)

    num_days = len(sleep_patterns)
    if num_days < 4:
        return np.nan

    consistent_minutes = 0
    total_minutes = 24 * 60

    # For each minute of the day, check if the state is consistent across all four days
    for minute_idx in range(total_minutes):
        states_at_minute = [pattern[minute_idx] for pattern in sleep_patterns]
        # Check if all states are the same (all 0s or all 1s)
        if len(set(states_at_minute)) == 1:
            consistent_minutes += 1

    consistency_score = (consistent_minutes / total_minutes) * 100
    return consistency_score

DataFrame with WHOOP-style consistency scores:
      SleepDate                 SleepTime                  WakeTime  \
0    2024-02-07 2024-02-07 23:43:00+00:00 2024-02-08 09:05:00+00:00   
1    2024-02-09 2024-02-09 23:58:00+00:00 2024-02-10 08:21:00+00:00   
2    2024-02-10 2024-02-11 00:12:00+00:00 2024-02-11 08:20:00+00:00   
3    2024-02-11 2024-02-11 23:54:00+00:00 2024-02-12 08:24:00+00:00   
4    2024-02-12 2024-02-13 00:30:00+00:00 2024-02-13 08:55:00+00:00   
..          ...                       ...                       ...   
545  2025-08-13 2025-08-14 02:05:51+00:00 2025-08-14 07:00:51+00:00   
546  2025-08-14 2025-08-15 00:00:44+00:00 2025-08-15 08:10:44+00:00   
547  2025-08-15 2025-08-15 23:59:43+00:00 2025-08-16 08:30:43+00:00   
548  2025-08-16 2025-08-17 00:05:25+00:00 2025-08-17 08:58:25+00:00   
549  2025-08-17 2025-08-17 23:02:01+00:00 2025-08-18 07:02:01+00:00   

     WHOOP_Consistency  
0                  NaN  
1                  NaN  
2                  NaN  


In [15]:
# Calculate the WHOOP-style consistency using a 4-day rolling window
whoop_consistency_scores = [np.nan] * 3 # First three entries have no 4-day window
for i in range(3, len(df_final)):
    four_day_df = df_final.iloc[i-3:i+1].copy()
    consistency_score = calculate_whoop_consistency(four_day_df)
    whoop_consistency_scores.append(consistency_score)

df_final['sci'] = whoop_consistency_scores

In [18]:
def linear_interpolation(baseline, current):
    """
    Performs linear interpolation on a set of 2D coordinates.

    Args:
        baseline (pd.DataFrame): A DataFrame with 'sci' and 'hr' columns,
                                 representing the baseline points for interpolation.
                                 'sci' values must be sorted.
        current (pd.DataFrame): A DataFrame with 'sci' values for which to
                                interpolate 'hr' values.

    Returns:
        pd.Series: A Series containing the interpolated 'hr' values.
    """
    
    # Sort the baseline DataFrame by 'sci' values to ensure correct interpolation
    baseline = baseline.sort_values(by='sci')
    
    # Initialize sci_1_hr to a default value
    sci_1_hr = np.nan
    
    # Check if a point with sci = 1 exists in the baseline
    if 1 in baseline['sci'].values:
        sci_1_hr = baseline[baseline['sci'] == 1]['hr'].iloc[0]

    # Handle out-of-bounds values (extrapolation)
    min_sci = baseline['sci'].min()
    max_sci = baseline['sci'].max()
    min_hr = baseline[baseline['sci'] == min_sci]['hr'].iloc[0]
    max_hr = baseline[baseline['sci'] == max_sci]['hr'].iloc[0]

    def interpolate_value(sci):
        # Handle null values first
        if pd.isna(sci):
            return sci_1_hr
        
        # Extrapolation cases
        if sci <= min_sci:
            return min_hr
        if sci >= max_sci:
            return max_hr
        
        # Linear interpolation
        # Find the two surrounding points
        upper_point = baseline[baseline['sci'] >= sci].iloc[0]
        lower_point = baseline[baseline['sci'] <= sci].iloc[-1]
        
        x1, y1 = lower_point['sci'], lower_point['hr']
        x2, y2 = upper_point['sci'], upper_point['hr']

        # The linear interpolation formula
        if (x2 - x1) != 0:
            interpolated_hr = y1 + (sci - x1) * (y2 - y1) / (x2 - x1)
            return interpolated_hr
        else:
            return y1 # Handle case where two points have the same 'sci' value

    # Apply the interpolation function to each 'sci' value in the current data
    current['hr'] = current['sci'].apply(interpolate_value)

    return current['hr']

In [21]:
df_final['sci_hr'] = linear_interpolation(pd.read_csv('sci_hr.csv'), df_final) 

In [22]:
df_final

Unnamed: 0,SleepDate,SleepTime,WakeTime,SleepDuration,WHOOP_Consistency,sci,hr,sci_hr
0,2024-02-07,2024-02-07 23:43:00+00:00,2024-02-08 09:05:00+00:00,9.366667,,,,
1,2024-02-09,2024-02-09 23:58:00+00:00,2024-02-10 08:21:00+00:00,8.383333,,,,
2,2024-02-10,2024-02-11 00:12:00+00:00,2024-02-11 08:20:00+00:00,8.133333,,,,
3,2024-02-11,2024-02-11 23:54:00+00:00,2024-02-12 08:24:00+00:00,8.500000,94.861111,94.861111,0.700000,0.700000
4,2024-02-12,2024-02-13 00:30:00+00:00,2024-02-13 08:55:00+00:00,8.416667,95.069444,95.069444,0.700000,0.700000
...,...,...,...,...,...,...,...,...
545,2025-08-13,2025-08-14 02:05:51+00:00,2025-08-14 07:00:51+00:00,4.916667,81.666667,81.666667,0.745201,0.745201
546,2025-08-14,2025-08-15 00:00:44+00:00,2025-08-15 08:10:44+00:00,8.166667,80.972222,80.972222,0.750166,0.750166
547,2025-08-15,2025-08-15 23:59:43+00:00,2025-08-16 08:30:43+00:00,8.516667,80.000000,80.000000,0.759218,0.759218
548,2025-08-16,2025-08-17 00:05:25+00:00,2025-08-17 08:58:25+00:00,8.883333,83.055556,83.055556,0.735351,0.735351
