In [71]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

import os
from tqdm import tqdm

### Sleep

In [75]:
def load_sleep_data(participant_path):
    participant_id = participant_path.split('/')[-1].split()[0]
    sleep_file = f'{participant_path}/{participant_id}_sleep.csv'
    sleep_periods_file = f'{participant_path}/{participant_id}_sleep-periods.csv'
    
    sleep_data = pd.read_csv(sleep_file)
    sleep_periods_data = pd.read_csv(sleep_periods_file)
    return sleep_data, sleep_periods_data

def process_bedtimes(row):
    try:
        bedtime_start = datetime.fromisoformat(row['bedtime_start'])
        bedtime_end = datetime.fromisoformat(row['bedtime_end'])
        sleep_duration_minutes = (bedtime_end - bedtime_start).total_seconds() / 60
    except:
        bedtime_start, bedtime_end, sleep_duration_minutes = None, None, None
    return bedtime_start, bedtime_end, sleep_duration_minutes

def transform_hypnogram_to_minute(hypnogram):
    return ''.join([stage * 5 for stage in hypnogram])

# Correcting the DataFrame creation process to ensure 'Datetime' is recognized as a datetime column from the start
def create_sleep_stage_sum_df(sleep_data):
    """
    Create an adjusted DataFrame with summed sleep stage values for each hour, including awake hours filled with zeros.
    This version ensures 'Sleep Stage Sum' is correctly calculated and displayed, and 'Datetime' is properly recognized.

    Args:
    sleep_data (DataFrame): DataFrame containing sleep data.

    Returns:
    DataFrame: Adjusted DataFrame with summed sleep stage values.
    """
    # Initialize a new DataFrame to store the results
    columns = ['Datetime', 'Sleep Stage Sum']
    adjusted_sleep_stage_sums_df = pd.DataFrame(columns=columns)

    # Iterate through each row in the sleep data
    for index, row in sleep_data.iterrows():
        # Parse the bedtime start and end times
        bedtime_start_delta = row['bedtime_start_delta']
        bedtime_end_delta = row['bedtime_end_delta']
        summary_date = pd.to_datetime(row['summary_date'])
        sleep_phase_string = row['hypnogram_5min']

        # Convert the sleep phase string into a list of integers
        sleep_phases = [int(phase) for phase in sleep_phase_string]

        # Calculate the sum of sleep stages for each hour
        for i, phase in enumerate(sleep_phases):
            # Calculate the actual datetime for each 5-minute interval
            interval_minutes = bedtime_start_delta + i * 5
            interval_datetime = summary_date + timedelta(minutes=interval_minutes)
            hour_datetime = interval_datetime.replace(minute=0, second=0, microsecond=0)

            # Append or update the DataFrame
            if hour_datetime in adjusted_sleep_stage_sums_df['Datetime'].values:
                # Update the existing 'Sleep Stage Sum'
                adjusted_sleep_stage_sums_df.loc[adjusted_sleep_stage_sums_df['Datetime'] == hour_datetime, 'Sleep Stage Sum'] += phase
            else:
                # Append the new data
                adjusted_sleep_stage_sums_df = adjusted_sleep_stage_sums_df.append({
                    'Datetime': hour_datetime,
                    'Sleep Stage Sum': phase
                }, ignore_index=True)

    # Converting 'Datetime' to datetime type
    adjusted_sleep_stage_sums_df['Datetime'] = pd.to_datetime(adjusted_sleep_stage_sums_df['Datetime'])

    # Sorting the DataFrame by Datetime
    adjusted_sleep_stage_sums_df.sort_values(by='Datetime', inplace=True)

    return adjusted_sleep_stage_sums_df

def impute_missing_sleep_values(df, sleep_data):
    """
    Impute missing values for each logged day. If an hour has no sleep stage value, it's assumed the person is awake,
    and the sleep stage sum is imputed as 0.

    Args:
    df (DataFrame): DataFrame with the summed sleep stage values.
    sleep_data (DataFrame): Original sleep data to identify the days to be imputed.

    Returns:
    DataFrame: DataFrame with missing values imputed.
    """
    # Create a new DataFrame to store the results
    imputed_df = pd.DataFrame(columns=df.columns)

    # Extract the unique dates from the sleep data
    unique_dates = pd.to_datetime(sleep_data['summary_date']).dt.date.unique()

    # For each date, ensure all hours are represented in the DataFrame
    for date in unique_dates:
        day_hours = pd.date_range(start=date, end=date + pd.Timedelta(days=1), closed='left', freq='H')
        day_df = df[df['Datetime'].dt.date == date]

        # Check for missing hours and impute them
        for hour in day_hours:
            if hour not in day_df['Datetime'].values:
                # Append a row with sleep stage sum as 0
                imputed_df = imputed_df.append({'Datetime': hour, 'Sleep Stage Sum': 0}, ignore_index=True)
            else:
                # Append the existing row
                imputed_df = imputed_df.append(day_df[day_df['Datetime'] == hour])

    # Sorting the DataFrame by Datetime
    imputed_df.sort_values(by='Datetime', inplace=True)

    return imputed_df

def process_sleep_data(sleep_data, sleep_periods_data):
    # Merge sleep data and sleep periods data
    combined_sleep_data = pd.merge(
        sleep_data[['summary_date', 'efficiency', 'participant_uid']],
        sleep_periods_data[['day', 'bedtime_start', 'bedtime_end', 'total_sleep_duration', 'participant_uid']],
        left_on=['summary_date', 'participant_uid'], 
        right_on=['day', 'participant_uid'],
        how='outer'  # Ensures all days are included, even those without sleep period data
    )

    # Process bedtimes and calculate sleep duration
    combined_sleep_data[['bedtime_start_dt', 'bedtime_end_dt', 'sleep_duration_minutes']] = combined_sleep_data.apply(
        process_bedtimes, axis=1, result_type="expand"
    )

    # Aggregate data by day, ensuring days without sleep periods are still included
    aggregated_sleep_data = combined_sleep_data.groupby(['participant_uid', 'summary_date']).agg({
        'bedtime_start_dt': 'min',  # Earliest bedtime, NaN if no data
        'bedtime_end_dt': 'max',    # Latest wake time, NaN if no data
        'sleep_duration_minutes': 'sum',  # Total sleep duration, 0 if no data
        'efficiency': 'mean'        # Average efficiency, NaN if no data
    }).reset_index()

    return aggregated_sleep_data


def calculate_sri(sleep_data):
    sleep_data['minute_by_minute_hypnogram'] = sleep_data['hypnogram_5min'].apply(transform_hypnogram_to_minute)
    unique_dates = sorted(sleep_data['summary_date'].unique())
    num_days = len(unique_dates)
    num_minutes = 24 * 60
    sleep_matrix = np.zeros((num_days, num_minutes), dtype=int)
    date_to_index = {date: idx for idx, date in enumerate(unique_dates)}
    for _, row in sleep_data.iterrows():
        day_index = date_to_index[row['summary_date']]
        hypnogram = row['minute_by_minute_hypnogram']
        for minute in range(min(num_minutes, len(hypnogram))):
            sleep_matrix[day_index, minute] = 1 if hypnogram[minute] != '0' else 0

    sri_scores = np.zeros(num_days)
    for i in range(1, num_days):
        similarity = 1 - np.sum(np.abs(sleep_matrix[i] - sleep_matrix[i - 1])) / num_minutes
        sri_scores[i] = similarity
    # Create a DataFrame for SRI scores
    sri_data = pd.DataFrame({'summary_date': unique_dates, 'sri_score': sri_scores})
    return sri_data


In [78]:
sleep_stage_sum_df = create_sleep_stage_sum_df(sleep_data)
imputed_sleep_stage_sums_df = impute_missing_sleep_values(sleep_stage_sum_df, sleep_data)
imputed_sleep_stage_sums_df


Unnamed: 0,Datetime,Sleep Stage Sum
0,2023-06-08 00:00:00,46
1,2023-06-08 01:00:00,40
2,2023-06-08 02:00:00,46
3,2023-06-08 03:00:00,39
4,2023-06-08 04:00:00,28
...,...,...
475,2023-07-03 19:00:00,0
476,2023-07-03 20:00:00,0
477,2023-07-03 21:00:00,0
478,2023-07-03 22:00:00,0


### Heart Rate

In [24]:
# Function to load heart rate data
def load_heart_data(participant_path):
    participant_id = participant_path.split('/')[-1].split()[0]
    daily_hr_file = f'{participant_path}/{participant_id}_daily-hr.csv'
    hr_hrv_file = f'{participant_path}/{participant_id}_hr-hrv.csv'

    daily_hr_data = pd.read_csv(daily_hr_file)
    hr_hrv_data = pd.read_csv(hr_hrv_file)
    return daily_hr_data, hr_hrv_data

# Sample Entropy Function
def sampen(L, m, r):
    N = len(L)
    B = 0.0
    A = 0.0
    xmi = np.array([L[i: i + m] for i in range(N - m)])
    xmj = np.array([L[i: i + m] for i in range(N - m + 1)])
    B = np.sum([np.sum(np.abs(xmii - xmj).max(axis=1) <= r) - 1 for xmii in xmi])
    m += 1
    xm = np.array([L[i: i + m] for i in range(N - m + 1)])
    A = np.sum([np.sum(np.abs(xmi - xm).max(axis=1) <= r) - 1 for xmi in xm])
    return -np.log(A / B) if B != 0 else np.nan

# Function to process daily heart rate data
def process_daily_hr_data(daily_hr_data):
    # Convert 'timestamp' column to datetime, handling errors
    daily_hr_data['timestamp'] = pd.to_datetime(daily_hr_data['timestamp'], errors='coerce')
    # Drop rows where the timestamp couldn't be converted
    daily_hr_data = daily_hr_data.dropna(subset=['timestamp'])
    daily_hr_data['date'] = pd.to_datetime(daily_hr_data['timestamp']).dt.date
    aggregated_hr_data = daily_hr_data.groupby('date').agg({
        'bpm': ['mean', 'std', 'min', 'max', lambda x: sampen(x, 2, 0.2 * np.std(x))]
    }).reset_index()
    aggregated_hr_data.columns = ['date', 'avg_hr', 'std_hr', 'min_hr', 'max_hr', 'sampen_hr']
    return aggregated_hr_data

def process_daily_hr_data_hourly(hr_hrv_data):
    # Convert 'timestamp' column to datetime, handling errors
    hr_hrv_data['timestamp'] = pd.to_datetime(hr_hrv_data['timestamp'], errors='coerce')
    # Drop rows where the timestamp couldn't be converted
    hr_hrv_data = hr_hrv_data.dropna(subset=['timestamp'])
    hr_hrv_data['hour'] = hr_hrv_data['timestamp'].dt.floor('H')
    aggregated_hr_data = hr_hrv_data.groupby('hour').agg({
        '5-min hr': ['mean', 'std', 'min', 'max', lambda x: sampen(x, 2, 0.2 * np.std(x))]
    }).reset_index()
    aggregated_hr_data.columns = ['hour', 'avg_hr', 'std_hr', 'min_hr', 'max_hr', 'sampen_hr']
    return aggregated_hr_data

# Function to process heart rate variability (HRV) data
def process_hr_hrv_data(hr_hrv_data):
    # Convert 'timestamp' column to datetime, handling errors
    hr_hrv_data['timestamp'] = pd.to_datetime(hr_hrv_data['timestamp'], utc=True)
    # Drop rows where the timestamp couldn't be converted
    hr_hrv_data = hr_hrv_data.dropna(subset=['timestamp'])
    hr_hrv_data['date'] = pd.to_datetime(hr_hrv_data['timestamp']).dt.date
    aggregated_hrv_data = hr_hrv_data.groupby('date').agg({
        '5-min hrv': ['mean', 'std', 'min', 'max']  # Adjust column names as needed
    }).reset_index()
    aggregated_hrv_data.columns = ['date', 'avg_hrv', 'std_hrv', 'min_hrv', 'max_hrv']
    return aggregated_hrv_data

def process_hr_hrv_data_hourly(hr_hrv_data):
    # Convert 'timestamp' column to datetime, handling errors
    hr_hrv_data['timestamp'] = pd.to_datetime(hr_hrv_data['timestamp'], utc=True)
    # Drop rows where the timestamp couldn't be converted
    hr_hrv_data = hr_hrv_data.dropna(subset=['timestamp'])
    hr_hrv_data['hour'] = hr_hrv_data['timestamp'].dt.floor('H')
    aggregated_hrv_data = hr_hrv_data.groupby('hour').agg({
        '5-min hrv': ['mean', 'std', 'min', 'max']  # Adjust column names as needed
    }).reset_index()
    aggregated_hrv_data.columns = ['hour', 'avg_hrv', 'std_hrv', 'min_hrv', 'max_hrv']
    return aggregated_hrv_data


### Activities

In [17]:
def load_activity_data(participant_path):
    participant_id = participant_path.split('/')[-1].split()[0]

    activity_file = f'{participant_path}/{participant_id}_activity.csv'
    activity_data = pd.read_csv(activity_file)
    return activity_data

def process_activity_data(activity_data):
    # Assuming 'summary_date' column exists and is in a standard format
    activity_data['date'] = pd.to_datetime(activity_data['summary_date']).dt.date

    # Select and potentially rename columns based on the metrics you need
    # Example: ['summary_date', 'total_steps', 'total_calories', 'score_stay_active', ...]
    processed_activity_data = activity_data[['date', 'steps', 'cal_total', 'score_stay_active', 'score_move_every_hour']]
    # Add more columns as needed

    return processed_activity_data

def process_activity_data_hourly(activity_data):
    # Assuming 'summary_date' column exists and is in a standard format
    activity_data['hour'] = pd.to_datetime(activity_data['summary_date']).dt.floor('H')

    # Select and potentially rename columns based on the metrics you need
    # Example: ['summary_date', 'total_steps', 'total_calories', 'score_stay_active', ...]
    processed_activity_data = activity_data[['hour', 'steps', 'cal_total', 'score_stay_active', 'score_move_every_hour']]
    # Add more columns as needed

    return processed_activity_data

In [47]:
sleep_features.date

0     2023-06-08
1     2023-06-09
2     2023-06-10
3     2023-06-11
4     2023-06-12
5     2023-06-14
6     2023-06-17
7     2023-06-18
8     2023-06-19
9     2023-06-20
10    2023-06-21
11    2023-06-22
12    2023-06-23
13    2023-06-24
14    2023-06-25
15    2023-06-27
16    2023-06-28
17    2023-06-29
18    2023-06-30
19    2023-07-03
Name: date, dtype: object

In [86]:
def format_date_string(date_column):
    return pd.to_datetime(date_column).dt.strftime('%Y-%m-%d')

def format_hour_string(datetime_column):
    return pd.to_datetime(datetime_column).dt.strftime('%Y-%m-%d %H:00:00')
# Processing Loop for Multiple Participants
# participant_directories = ['../Studies/BIN Complete Data Sets/BIN01 Data/'] 
file_path = "../Studies/BIN Complete Data Sets/"
participant_directories = [os.path.join(file_path, dir) for dir in os.listdir(file_path) if dir.endswith("Data")]

# Initialize an empty DataFrame for the combined data
all_data_combined = pd.DataFrame()
all_data_combined_houly = pd.DataFrame()

for participant_dir in tqdm(participant_directories):
    # Extract participant ID from directory name
    participant_id = participant_dir.split('/')[-1].split()[0]

    ### Sleep Data Processing
    sleep_data, sleep_periods_data = load_sleep_data(participant_dir)
    sleep_stage_sum_hourly = create_sleep_stage_sum_df(sleep_data)
    processed_sleep_data = process_sleep_data(sleep_data, sleep_periods_data)
    sri_data = calculate_sri(sleep_data)
    # Ensure consistent date column name and add participant_id
    sleep_features = processed_sleep_data.rename(columns={'summary_date': 'date'})
    sleep_features.drop(columns=["participant_uid"], inplace=True)
    sri_data = sri_data.rename(columns={'summary_date': 'date'})
    sleep_features['date'] = format_date_string(sleep_features['date'])
    sri_data['date'] = format_date_string(sri_data['date'])
    sleep_features = pd.merge(sleep_features, sri_data, on=['date'], how='outer')
    sleep_features['participant_id'] = participant_id
    sleep_stage_sum_hourly['participant_id'] = participant_id
    sleep_stage_sum_hourly.rename(columns={"Datetime":"hour"}, inplace=True)
    sleep_stage_sum_hourly['hour'] = format_hour_string(sleep_stage_sum_hourly['hour'])
    ### Heart Rate Data Processing
    daily_hr_data, hr_hrv_data = load_heart_data(participant_dir)
    processed_hr_data = process_daily_hr_data(daily_hr_data)
    processed_hrv_data = process_hr_hrv_data(hr_hrv_data)
    processed_hr_data_hourly = process_daily_hr_data_hourly(hr_hrv_data)
    processed_hrv_data_hourly = process_hr_hrv_data_hourly(hr_hrv_data)
    # Add participant_id and merge heart data
    heart_data = processed_hr_data
    processed_hr_data['date'] = format_date_string(processed_hr_data['date'])
    processed_hrv_data['date'] = format_date_string(processed_hrv_data['date'])
    processed_hr_data_hourly['hour'] = format_hour_string(processed_hr_data_hourly['hour'])
    processed_hrv_data_hourly['hour'] = format_hour_string(processed_hrv_data_hourly['hour'])
    heart_data = pd.merge(heart_data, processed_hrv_data, on=['date'], how='outer')
    heart_hourly_data = pd.merge(processed_hr_data_hourly, processed_hrv_data_hourly, on=['hour'], how='outer')
    heart_data['participant_id'] = participant_id
    heart_hourly_data['participant_id'] = participant_id

    ### Activity Data Processing
    activity_data = load_activity_data(participant_dir)
    processed_activity_data = process_activity_data(activity_data)
    processed_activity_data['date'] = format_date_string(processed_activity_data['date'])
    processed_activity_data['participant_id'] = participant_id

    # Merge all features for the participant
    combined_data_participant = pd.merge(sleep_features, heart_data, on=['participant_id', 'date'], how='outer')
    combined_data_participant = pd.merge(combined_data_participant, processed_activity_data, on=['participant_id', 'date'], how='outer')

    combined_data_participant_hourly = pd.merge(sleep_stage_sum_hourly, heart_hourly_data, on=['participant_id', 'hour'], how='outer')
    combined_data_participant_hourly.fillna({"Sleep Stage Sum": 0})
    # Concatenate with the overall dataset
    all_data_combined = pd.concat([all_data_combined, combined_data_participant])
    all_data_combined_houly = pd.concat([all_data_combined_houly, combined_data_participant_hourly])

# Reset index of the final DataFrame
all_data_combined.reset_index(drop=True, inplace=True)
all_data_combined_houly.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [89]:
all_data_combined_houly

Unnamed: 0,hour,Sleep Stage Sum,participant_id,avg_hr,std_hr,min_hr,max_hr,sampen_hr,avg_hrv,std_hrv,min_hrv,max_hrv
0,2023-06-02 03:00:00,16,BIN01,,,,,,,,,
1,2023-06-02 04:00:00,18,BIN01,,,,,,,,,
2,2023-06-02 05:00:00,24,BIN01,,,,,,,,,
3,2023-06-02 06:00:00,20,BIN01,,,,,,,,,
4,2023-06-02 07:00:00,26,BIN01,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
9604,2023-09-06 07:00:00,,BIN23,66.750000,2.301185,63.0,70.0,,56.916667,14.637799,32.0,78.0
9605,2023-09-06 08:00:00,,BIN23,64.833333,1.749459,62.0,68.0,inf,48.333333,11.964594,34.0,70.0
9606,2023-09-06 09:00:00,,BIN23,64.916667,1.621354,63.0,67.0,inf,53.333333,6.429101,45.0,63.0
9607,2023-09-06 10:00:00,,BIN23,42.833333,31.642415,0.0,66.0,0.271934,28.750000,21.303542,0.0,46.0


In [62]:
combined_sleep_data.apply(full_day_sleep_stage_to_hourly, axis=1)

ValueError: Neither `start` nor `end` can be NaT

In [62]:
all_data_combined.to_csv("../Studies/oura_feats_bin01_23.csv", index=False)

PermissionError: [Errno 13] Permission denied: '../Studies/oura_feats_bin01_23.csv'

In [90]:
all_data_combined_houly.to_csv("../Studies/oura_feats_bin01_23_hourly.csv", index=False)

In [60]:
pd.read_csv("../Studies/oura_feats_bin01_23.csv")

Unnamed: 0,date,bedtime_start_dt,bedtime_end_dt,sleep_duration_minutes,efficiency,sri_score,participant_id,avg_hr,std_hr,min_hr,max_hr,sampen_hr,avg_hrv,std_hrv,min_hrv,max_hrv,steps,cal_total,score_stay_active,score_move_every_hour
0,2023-06-08,2023-06-07 23:23:33-05:00,2023-06-08 06:33:33-05:00,430.0,93.0,0.000000,BIN01,90.285714,16.214271,42.0,138.0,1.331806,12.298851,3.974087,0.0,24.0,6509.0,2182.0,69.0,95.0
1,2023-06-09,2023-06-08 21:15:28-05:00,2023-06-09 16:37:59-05:00,446.0,93.0,0.968750,BIN01,92.118483,13.299488,41.0,131.0,1.389414,14.649351,6.542907,0.0,39.0,10324.0,2427.0,56.0,100.0
2,2023-06-10,2023-06-10 00:21:30-05:00,2023-06-10 04:50:30-05:00,269.0,93.0,0.920139,BIN01,100.902174,16.567174,45.0,145.0,1.233330,9.777778,2.415880,0.0,16.0,2399.0,1909.0,98.0,95.0
3,2023-06-11,2023-06-11 01:31:32-05:00,2023-06-11 10:05:32-05:00,514.0,91.0,0.829861,BIN01,96.481132,8.716958,85.0,117.0,2.209495,12.359223,4.292845,0.0,26.0,3532.0,1988.0,68.0,100.0
4,2023-06-12,2023-06-11 22:21:29-05:00,2023-06-12 06:16:29-05:00,475.0,55.0,0.972222,BIN01,98.267606,10.103658,84.0,131.0,1.472472,13.593750,9.850010,0.0,42.0,6315.0,2223.0,50.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
779,2023-08-13,,,,,,BIN23,76.359606,8.093130,69.0,128.0,1.247793,,,,,1892.0,2401.0,61.0,60.0
780,2023-08-15,,,,,,BIN23,88.757764,10.690402,68.0,119.0,1.873549,,,,,13980.0,3332.0,83.0,100.0
781,2023-08-21,,,,,,BIN23,84.790244,13.805366,70.0,140.0,1.278354,,,,,4155.0,2468.0,100.0,100.0
782,2023-09-05,,,,,,BIN23,81.903553,13.400268,68.0,148.0,1.362764,,,,,5349.0,2620.0,67.0,95.0


In [38]:
heart_hourly_data

Unnamed: 0,hour,avg_hr,std_hr,min_hr,max_hr,sampen_hr,avg_hrv,std_hrv,min_hrv,max_hrv,participant_id
0,2023-06-08 04:00:00,81.000000,32.837261,0,96,0.223144,13.375000,5.853875,0,19,BIN01
1,2023-06-08 05:00:00,95.333333,1.723281,92,97,inf,11.166667,3.270622,7,16,BIN01
2,2023-06-08 06:00:00,93.000000,2.593699,89,97,inf,15.000000,5.877538,7,24,BIN01
3,2023-06-08 07:00:00,90.000000,2.000000,88,94,1.098612,10.750000,2.490893,6,16,BIN01
4,2023-06-08 08:00:00,87.666667,2.188122,84,92,,11.500000,3.233349,7,17,BIN01
...,...,...,...,...,...,...,...,...,...,...,...
172,2023-07-03 08:00:00,76.250000,1.544786,73,79,0.693147,22.166667,5.474459,15,33,BIN01
173,2023-07-03 09:00:00,73.333333,2.015095,70,76,,17.500000,5.535013,10,30,BIN01
174,2023-07-03 10:00:00,74.750000,2.527126,71,78,,18.083333,4.209477,14,27,BIN01
175,2023-07-03 11:00:00,72.250000,1.959824,70,75,,21.750000,3.934117,13,28,BIN01


In [39]:
sleep_data

Unnamed: 0,summary_date,score,score_alignment,score_deep,score_disturbances,score_efficiency,score_latency,score_rem,score_total,duration,...,breath_average,average_breath_variation,bedtime_start_delta,bedtime_end_delta,temperature_delta,hypnogram_5min,type,timestamp,participant_uid,minute_by_minute_hypnogram
0,2023-06-08,82,100,97,73,98,75,97,69,25800,...,17.75,3.375,-2187,23613,-0.07,4211111111111112333111112221112222234322222233...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444422222111111111111111111111111111111111111...
1,2023-06-09,80,100,97,81,98,86,96,59,22920,...,17.375,2.75,-9872,13048,0.08,4421122111111112224133333221111111111133322333...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444444444222221111111111222222222211111111111...
2,2023-06-10,66,100,96,78,98,67,61,34,16140,...,17.5,2.75,1290,17430,-0.03,2211111111111123333333322112211222233332211111...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,2222222222111111111111111111111111111111111111...
3,2023-06-11,80,23,97,80,96,67,100,86,30840,...,17.25,2.75,5492,36332,0.2,4331111111122111122222222211112222333333322222...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444433333333331111111111111111111111111111111...
4,2023-06-12,58,100,97,65,21,86,53,37,28500,...,17.125,3.625,-5911,22589,0.2,4411111111111113311442111111333334444444444244...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444444444111111111111111111111111111111111111...
5,2023-06-14,84,100,96,81,96,67,100,76,28140,...,17.125,2.625,-3989,24151,0.07,2212211111111333311111111123333333332222333332...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,2222222222111112222222222111111111111111111111...
6,2023-06-17,93,77,100,83,97,81,100,100,41520,...,16.75,2.625,-6869,34651,0.25,4233111111121111111111111113332222221111221111...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444422222333333333311111111111111111111111111...
7,2023-06-18,86,73,99,73,99,70,100,88,30360,...,16.5,2.625,-1287,29073,-0.13,4222221111112111442111111111111121123333322222...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444422222222222222222222222221111111111111111...
8,2023-06-19,60,68,95,71,88,72,42,38,18600,...,15.5,2.375,5670,24270,-0.24,4231222211111111122232221111111422242224233333...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444422222333331111122222222222222222222111111...
9,2023-06-20,85,100,98,80,98,94,87,71,26160,...,16.0,1.875,-8909,17251,0.07,4442333311111332421111122212221111332113222211...,long_sleep,,84a34873-1c85-4748-9c1d-d0d2a08edfe3,4444444444444442222233333333333333333333111111...
