In [283]:
%reset -f

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Polar Json converter

Last updated by AKR (8/13/24).

This notebook converts "manually exported" Polar json files into csv files.

**Format of csv file output:**

**f"{participant id}\_{polar workout number (pwo)}\_{day since start date}_hr_WearablePolar_Polar.csv"**

For example: **019_pwo04_d006_hr_WearablePolar_Polar.csv** is participant 19's 4th workout, completed 6 days after the study start date.

**Details:**
- **pwo (polar workout number)**: workouts are counted if they are >5 minutes in duration. After passing this filter, they are numbered sequentially by date.
- **d (days)**: workouts are named in reference to the "study start date," defined as the the first recorded date in the Week 1 Workout Log in REDCap. This annotation in REDCap is the "ground truth" for when participants began exercising. Oftentimes, they test the device before the study or complete a workout session at the CTRU before the intervention. This will appear as negative days (e.g. 4 days before study is -d004). In other words, the first Polar entry may be a "test" or CTRU workout, so REDCap is the best and closest annotation to training start date.


Each csv file represents an individual workout, with columns for dateTime and (HR) values. 

We expect participants to have 36 workouts (pwo01 - pwo36) if they have completed 3 workouts per week for 12 weeks. On average, participants would be expected to have participated for ~84 days in the study (i.e. 12 weeks), but they are allowed 2 week extensions for non-adherence, illness, or to accomodate for scheduling. In rare cases of injury, participants were extended beyond 14 weeks.



### Load packages

In [12]:
import random
import os, glob
import pandas as pd
from datetime import datetime, timedelta
import json 
from IPython.display import display, HTML
import matplotlib.pyplot as plt

#initialization
random.seed(1000)

### Function to gets files for a participant

In [25]:
def get_files(participant):
    json_path = f"/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar_recovery/{participant}/polar-user-data-export_{participant}/"
    #json_path = f"/Users/aubreykr/Library/CloudStorage/GoogleDrive-aubreykr@stanford.edu/Shared drives/HIIT and Endurance Study/Data/data/polar_recovery/097_qtz1b15382499214278169/polar-user-data-export_097_qtz1b15382499214278169/"
    #json_path = f"/Users/aubreykr/Library/CloudStorage/GoogleDrive-aubreykr@stanford.edu/Shared drives/HIIT and Endurance Study/Data/data/polar_recovery/060_qtz1b13779232687428538/polar-user-data-export_060_qtz1b13779232687428538/"
    os.chdir(json_path)
    files = []
    for file in glob.glob("training-session*.json"):
        files.append(file)
    original_files = files.copy()
    files = sorted(files)
    print("There are", len(files), "files for", participant)
    return files
    
#get_files("060_qtz1b13779232687428538")

get_files("1000_qtz1b12831179598963353")


notes_path = '/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/Redcap/Raw_Data/64208_WorkoutTimeStamps_Raw.csv'
df_notes_orig = pd.read_csv(f"{notes_path}")
df_notes= df_notes_orig.copy()
df_notes[df_notes['record_id']==1000]

There are 44 files for 1000_qtz1b12831179598963353


Unnamed: 0.1,Unnamed: 0,record_id,redcap_event_name,data_collection_start_date,data_collection_end_date,workout_1,borg_rpe_1,polar_1,fitbit_1,heartrate_1,...,admin_workout_attempts,admin_num_success_workouts,admin_reasons_for_non_adherence___2,admin_reasons_for_non_adherence___3,admin_reasons_for_non_adherence___4,admin_reasons_for_non_adherence___5,admin_reasons_for_non_adherence___6,admin_reasons_for_non_adherence___99,admin_notes,perceived_stress_scale_pss10_complete


### Function to convert Polar json to csv

In [26]:
def convert_polar(participant):

    # 1) REDCAP DATA
    
    # Load participant workout logs from REDCap (goal: obtain workout dates from handwritten logs to compare with Polar wearable data).
    notes_path = '/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/Redcap/Raw_Data/64208_WorkoutTimeStamps_Raw.csv'
    df_notes_orig = pd.read_csv(f"{notes_path}")
    df_notes= df_notes_orig.copy()

    # Convert all workout dates to datetime
    df_notes['workout_1'] = pd.to_datetime(df_notes['workout_1']) 
    df_notes['workout_2'] = pd.to_datetime(df_notes['workout_2'])
    df_notes['workout_3'] = pd.to_datetime(df_notes['workout_3'])
    df_notes['workout1_date_only'] = df_notes['workout_1'].dt.strftime("%Y-%m-%d")
    df_notes['workout2_date_only'] = df_notes['workout_2'].dt.strftime("%Y-%m-%d")
    df_notes['workout3_date_only'] = df_notes['workout_3'].dt.strftime("%Y-%m-%d")


    # 2) Get POLAR from Google Drive
    
    json_path = f"/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar_recovery/{participant}/polar-user-data-export_{participant}/"
    os.chdir(json_path)
    files = []
    for file in glob.glob("training-session*.json"):
        files.append(file)
    original_files = files.copy()
    files = sorted(files)
    print("There are", len(files), "files for", participant)

    
    # For each each Polar json file, extract HR data; then save as a csv file in Google Drive.
    # Create loop to iterate through Polar json files
    i=0
    counter=0
    for i, file in enumerate(files):

        # Get full path to file
        full_file_name = f"{json_path}{file}"

        # Read json data from the file and store it as a string
        with open(full_file_name, 'r', ) as file2read:
            json_data = file2read.read()
            file2read.close()

        # Parse json into a data dictionary with json.loads()
        data_dict = json.loads(json_data)

        # Extract key of interest
        exercises = data_dict['exercises']

        # Get data from Polar file (e.g. extract values, convert to dataframe)
        try:
            df_hr = pd.DataFrame(exercises[0]['samples']['heartRate'])

        # Print if there is an error but keep running
        except (KeyError, IndexError) as e:
            # Handle the case where 'exercises' is missing or empty
            print(f"The file {participant} might be empty! Error: {e}")
            continue
        
        # Convert time column to datetime type
        df_hr['dateTime'] = pd.to_datetime(df_hr['dateTime'])

    # 3) Enrich Polar file with REDCap workout annotations
        
        # a) Extract date of each workout as a string
        date_info = df_hr['dateTime'].iloc[0].strftime("%Y-%m-%d")
        workout_date = pd.to_datetime(date_info).date() # Convert string to date

        # b) Get redcap start date for each ppt
        participant_num = int(participant.split('_')[0]) # Get ppt number from polar file
        start_date_str = df_notes.loc[(df_notes['redcap_event_name'] == 'week_1_arm_1') & (df_notes['record_id'] == participant_num), 'workout_1'].values[0]
        start_date = pd.to_datetime(start_date_str).date() # Convert string to date

        # c) Compute day of workout (d) relative to redcap start date 
        delta = workout_date - start_date
        days_str = str(delta.days)
        day = f'd{days_str:0>3}'

        # d) If the workout is longer than 5 minutes, label it as a polar workout (pwo)
        if df_hr['dateTime'].max() - df_hr['dateTime'].min() > timedelta(minutes=5):

            # Update pwo number
            counter += 1
            workout_num = f'pwo{counter:02}'


        # e) Rename file in desired format with participant ID, polar workout number (pwo), and day (d) since study start
            wo_info = f'{workout_num}_{day}'
            output_path = f"/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/{participant}/"
            output_name = f"{participant.split('_')[0]}_{wo_info}_hr_WearablePolar_Polar.csv" 
            fullname = os.path.join(output_path, output_name)   


        # f) Save to drive
            if not os.path.exists(output_path):  
                os.mkdir(output_path) 

            if not os.path.exists(fullname): 
                df_hr.to_csv(fullname, index=False)
                print(f"CSV file saved: {fullname}")
            
            # If files already exist, don't overwrite!
            else:
                print(f"File already exists and will not be overwritten.") 

    print("Processing complete.")
    print(f'Participant {participant_num}: {counter} workouts logged.')
    

In [27]:
convert_polar("1000_qtz1b12831179598963353")

There are 44 files for 1000_qtz1b12831179598963353
CSV file saved: /Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/1000_qtz1b12831179598963353/1000_pwo01_d000_hr_WearablePolar_Polar.csv
CSV file saved: /Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/1000_qtz1b12831179598963353/1000_pwo02_d009_hr_WearablePolar_Polar.csv
CSV file saved: /Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/1000_qtz1b12831179598963353/1000_pwo03_d010_hr_WearablePolar_Polar.csv
CSV file saved: /Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/1000_qtz1b12831179598963353/1000_pwo04_d109_hr_WearablePolar_Polar.csv
CSV file saved: /Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/1000_qtz1b12831179598963353/1000_pwo05_d112_hr_WearablePolar_Polar.csv
CSV file saved: /Users/aubreykr/Google Drive/Shared dr

## Plot the QC check files

In [83]:
# Plot the QC check files
participant = "073_qtz1b17592513599164541"
base_dir = f"/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/polar/workout/{participant}/"

# Load redcap data
df_path = '/Users/aubreykr/Google Drive/Shared drives/HIIT and Endurance Study/Data/data/Redcap/Raw_Data/64208_HRZones_Raw.csv'
df_red = pd.read_csv(df_path)

ppt= participant.split('_')[1]
participant_info = df_red[df_red['myphd_id'] == ppt]['record_id'].iloc[0]

# Pattern to match QC_CHECK files
pattern = os.path.join(base_dir, "*.csv")

# Get a list of files that match the pattern
qc_check_files = glob.glob(pattern)

# Print the list of QC_CHECK files
duration=0
for file in qc_check_files:
    df = pd.read_csv(file)

    # Convert 'dateTime' column to datetime objects
    df['dateTime'] = pd.to_datetime(df['dateTime'])

    # Get duration
    duration = df['dateTime'].max() - df['dateTime'].min()

    # Get group
    rand_group = df_red[df_red['myphd_id'] == ppt]['randomization_group'].values[0]

    # Get HR zones
    MICT_lower_val = df_red[df_red['myphd_id'] == ppt]['target_hr_45'].values[0]
    MICT_upper_val = df_red[df_red['myphd_id'] == ppt]['target_hr_55'].values[0]
    HIIT_lower_val = df_red[df_red['myphd_id'] == ppt]['target_hr_70'].values[0]

    # Extract the time in HH:MM format
    df['dateTime'] = df['dateTime'].dt.strftime('%H:%M')

    # Plot the data
    plt.figure(figsize=(12,6))
    plt.plot(df['dateTime'], df['value'], label='Heart Rate')
    plt.xlabel('Time (HH:MM)')
    plt.ylabel('Heart Rate (bpm)')
    plt.title(f'{duration}, {file.split("/")[11]}')
    plt.axhline(y=MICT_lower_val, color='g', linestyle='--')
    plt.axhline(y=MICT_upper_val, color='y', linestyle='--')
    plt.axhline(y=HIIT_lower_val, color='r', linestyle='--')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

73
