In [1]:
import os
import zipfile
import json
import pandas as pd
import csv

In [2]:
zip_directory = 'files'  # Directory containing zip files
zip_file_name_start = 'takeout'
inner_file_name_start = 'Takeout/Fitbit/Global Export Data/heart_rate'
output_csv_file = 'files/hr_output.csv'  # Output CSV file path

In [8]:
# load and compile data
try:        
    # Find zip files starting with 'xyz'
    matching_zips = [file for file in os.listdir(zip_directory) \
                     if file.startswith(zip_file_name_start) and file.endswith('.zip')]

    data = []  # List to store extracted data

    for zip_file_name in matching_zips:
        zip_file_path = os.path.join(zip_directory, zip_file_name)
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()  # Get list of files in the zip
            for file_name in file_list:
                if file_name.startswith(inner_file_name_start):  # Check if the file name starts with 'abc'
                    with zip_ref.open(file_name) as file:
                        # Parse JSON content
                        json_data = json.load(file)
                        for entry in json_data:
                            dateTime = entry.get('dateTime', '')
                            bpm = entry.get('value', {}).get('bpm', '')
                            data.append({'dateTime': dateTime, 'bpm': bpm})
    
    # Create DataFrame from the list of dictionaries
    df = pd.DataFrame(data)
    # convert 'dateTime' column to datetime type
    df['dateTime'] = pd.to_datetime(df['dateTime'])
    # Extract date parts from the 'dateTime' column
    df['date'] = df['dateTime'].dt.date
    df['hour'] = df['dateTime'].dt.hour
    df['month_start'] = df['dateTime'].dt.to_period('M').dt.to_timestamp()
    df['week_start'] = df['dateTime'].dt.to_period('W').dt.to_timestamp()
    
    # Print the first five rows of the DataFrame
    print(df.head())
    
except FileNotFoundError:
    print("The specified directory does not exist.")
except zipfile.BadZipFile:
    print("A zip file is not valid.")
except Exception as e:
    print(f"An error occurred: {e}")

             dateTime  bpm        date  hour month_start week_start
0 2024-02-27 18:33:40   70  2024-02-27    18  2024-02-01 2024-02-26
1 2024-02-27 18:33:50   69  2024-02-27    18  2024-02-01 2024-02-26
2 2024-02-27 18:33:55   69  2024-02-27    18  2024-02-01 2024-02-26
3 2024-02-27 18:34:00   72  2024-02-27    18  2024-02-01 2024-02-26
4 2024-02-27 18:34:05   64  2024-02-27    18  2024-02-01 2024-02-26


In [None]:
TBD

# Group by date and hour, and calculate the required statistics
result_df = df.groupby(['date', 'hour'])['bpm'].agg(['max', 'min', lambda x: x.quantile(0.25), 'median', lambda x: x.quantile(0.75)]).reset_index()

# Rename the columns for better clarity
result_df.columns = ['date', 'hour', 'max_bpm', 'min_bpm', '25th_percentile_bpm', 'median_bpm', '75th_percentile_bpm']
