# Exploring Fitbit Data Export
## Checking datatypes and import
### Imports

In [1]:
import os
import pandas as pd
import json
from csv import DictReader
import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter

## Test functions

In [127]:
import os
import pandas as pd
import json
from csv import DictReader

df_dict = {}
path_dict = {
    'profile': '../data/raw/Profile.csv',
    'respiratory_rate': '../data/raw/breath/rr/rr_summary/',
    'vo2_max': '../data/raw/breath/demographic_vo2max/',
    'oxygen_variation': '../data/raw/breath/oxygen_variation/',
    'spo2_daily': '../data/raw/breath/spo2/daily_spo2/',
    'spo2_intraday': '../data/raw/breath/spo2/minute_spo2/',
    'afib_ecg': '../data/raw/heart/afib_ecg/',
    'heart_rate': '../data/raw/heart/hr/',
    'hrv_summary': '../data/raw/heart/hrv/hrv_summary/',
    'hrv_histogram': '../data/raw/heart/hrv/hrv_histogram/',
    'hrv_details': '../data/raw/heart/hrv/hrv_details/',
    'time_in_hr_zones': '../data/raw/heart/time_in_hr_zones/',
    'sleep_profile': '../data/raw/sleep/Sleep Profile.csv',
    'sleep_score': '../data/raw/sleep/sleep_score.csv',
    'sleep_json': '../data/raw/sleep/json/',
    'stress': '../data/raw/stress/Stress Score.csv',
    'weight': '../data/raw/weight/'
}


# ================================ FUNCTIONS ================================
def read_file(filename, directory=None, flatten=False):
    filepath = os.path.join(directory, filename) if directory else filename
    if filename.endswith('.csv'):
        df = pd.read_csv(filepath)
    elif filename.endswith('.json'):
        with open(filepath, 'r') as file:
            data = json.load(file)
            df = pd.json_normalize(data)
    else:
        df = None
    return df


def fitbit_data_handler(path_string, flatten=False):
    # If path is a directory, read all files in the directory
    if os.path.isdir(path_string):
        directory = path_string
        dfs = []
        for filename in os.listdir(directory):
            df = read_file(filename, directory)
            dfs.append(df)
        dfs = pd.concat(dfs, ignore_index=True)
        return dfs
    else:  # Otherwise, just operate on one file
        return read_file(filename=path_string)

In [128]:
for name,path in path_dict.items():
    df_dict[name] = fitbit_data_handler(path)

In [134]:
output_directory = '../data/processed/'
for name,df in df_dict.items():
    df.to_csv(f'{output_directory}{name}.csv', index=False)

In [83]:
def read_file(filename, directory=None, flatten=False):
    filepath = os.path.join(directory, filename) if directory else filename
    if filename.endswith('.csv'):
        df = pd.read_csv(filepath)
    elif filename.endswith('.json'):
        with open(filepath, 'r') as file:
            data = json.load(file)
            df = pd.json_normalize(data)
    else:
        df = None
    return df


def fitbit_data_handler(path_string):
    # If path is a directory, read all files in the directory
    if os.path.isdir(path_string):
        directory = path_string
        dfs = []
        for filename in os.listdir(directory):
            df = read_file(filename, directory)
            dfs.append(df)
        dfs = pd.concat(dfs, ignore_index=True)
        return dfs
    else: # Otherwise, just operate on one file
        return read_file(filename=path_string)

In [84]:
profile = fitbit_data_handler(path_dict['profile'])

In [86]:
rr_summary = fitbit_data_handler(path_dict['respiratory_rate'])

In [87]:
vo2_max = fitbit_data_handler(path_dict['vo2_max'])

In [88]:
ox_var = fitbit_data_handler(path_dict['oxygen_variation'])

In [89]:
spo2_daily = fitbit_data_handler(path_dict['spo2_daily'])

In [90]:
spo2_intraday = fitbit_data_handler(path_dict['spo2_intraday'])

In [91]:
afib_ecg = fitbit_data_handler(path_dict['afib_ecg'])

In [92]:
hr = fitbit_data_handler(path_dict['heart_rate'])

In [93]:
hrv_summary = fitbit_data_handler(path_dict['hrv_summary'])

In [94]:
hrv_histogram = fitbit_data_handler(path_dict['hrv_histogram'])

In [95]:
hrv_details = fitbit_data_handler(path_dict['hrv_details'])

In [96]:
time_in_hr_zones = fitbit_data_handler(path_dict['time_in_hr_zones'])

In [97]:
sleep_profile = fitbit_data_handler(path_dict['sleep_profile'])

In [73]:
sleep_score = fitbit_data_handler(path_dict['sleep_score'])

In [102]:
sleep_json = fitbit_data_handler(path_dict['sleep_json'])

In [116]:
stress = fitbit_data_handler(path_dict['stress'])

In [120]:
weight = fitbit_data_handler(path_dict['weight'])

### Profile

In [None]:
with open ('../data/raw/Profile.csv', 'r') as file:
    reader = DictReader(file)
    profile = next(reader)

In [None]:
profile

### Breath
#### Respiratory Rate
##### Respiratory Rate Summary

In [None]:
rr_summary = []

In [None]:
directory_path = '../data/raw/breath/rr/rr_summary/'
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        rr_summary.append(temp_df)       
rr_summary = pd.concat(rr_summary,ignore_index=True)

In [None]:
rr_summary

##### Respiratory Rate Daily Summary

In [None]:
directory_path = '../data/raw/breath/rr/daily_rr_summary/'
daily_rr_summary = []
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        daily_rr_summary.append(temp_df)
daily_rr_summary = pd.concat(daily_rr_summary,ignore_index=True)

In [None]:
daily_rr_summary

#### Demographic VO2 Max

In [22]:
directory_path = '../data/raw/breath/demographic_vo2max/'

In [23]:
dem_vo2_max = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        with open(file_path,'r') as file:
            temp_df = pd.DataFrame(json.load(file))
        value_df = temp_df['value'].apply(pd.Series)
        result_df = pd.concat([temp_df, value_df], axis=1).drop('value', axis=1)
        dem_vo2_max.append(result_df)
        
dem_vo2_max = pd.concat(dem_vo2_max,ignore_index=True)

In [25]:
dem_vo2_max

Unnamed: 0,dateTime,demographicVO2Max,demographicVO2MaxError,filteredDemographicVO2Max,filteredDemographicVO2MaxError
0,01/13/22 00:00:00,46.16596,3.0,46.16596,3.00000
1,01/14/22 00:00:00,46.45953,3.0,46.31396,1.51239
2,01/15/22 00:00:00,45.66477,3.0,46.09164,1.02734
3,01/16/22 00:00:00,46.29315,3.0,46.14488,0.79267
4,01/17/22 00:00:00,46.18243,3.0,46.15311,0.65787
...,...,...,...,...,...
769,03/11/24 00:00:00,39.05311,3.0,38.96958,0.36310
770,03/12/24 00:00:00,39.39203,3.0,39.02071,0.36310
771,03/13/24 00:00:00,39.95355,3.0,39.13361,0.36310
772,03/14/24 00:00:00,39.46535,3.0,39.17376,0.36310


#### Oxygen Variation

In [None]:
file_path = '../data/raw/breath/oxygen_variation/estimated_oxygen_variation-2022-01-13.csv'
test = pd.read_csv(file_path)

In [32]:
ox_var = []
directory = path_dict['oxygen_variation']
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        temp_df = pd.read_csv(file_path)
        ox_var.append(temp_df)
ox_var = pd.concat(ox_var,ignore_index=True)

In [33]:
ox_var

Unnamed: 0,timestamp,Infrared to Red Signal Ratio
0,01/12/22 15:09:00,0
1,01/12/22 15:10:00,3
2,01/12/22 15:11:00,21
3,01/12/22 15:12:00,6
4,01/12/22 15:13:00,-3
...,...,...
340348,03/14/24 08:59:34,27
340349,03/14/24 09:00:34,-4
340350,03/14/24 09:01:34,0
340351,03/14/24 09:02:34,0


In [None]:
directory_path = '../data/raw/breath/oxygen_variation/'
ox_var = pd.DataFrame()
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        ox_var = pd.concat([ox_var, temp_df])

In [None]:
ox_var 

#### SpO2
##### SpO2 Daily  

In [36]:
directory_path = '../data/raw/breath/spo2/daily_spo2/'
spo2_daily = pd.DataFrame()
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        spo2_daily = pd.concat([spo2_daily, temp_df])

In [37]:
spo2_daily

Unnamed: 0,timestamp,average_value,lower_bound,upper_bound
0,2022-01-21T00:00:00Z,96.5,93.0,99.6
1,2022-01-22T00:00:00Z,96.0,93.5,99.5
2,2022-01-23T00:00:00Z,96.9,94.3,99.9
3,2022-01-24T00:00:00Z,95.9,93.9,98.8
4,2022-01-25T00:00:00Z,95.3,91.0,98.0
...,...,...,...,...
82,2024-03-11T00:00:00Z,96.2,92.4,99.1
83,2024-03-12T00:00:00Z,96.2,92.4,99.0
84,2024-03-13T00:00:00Z,95.4,93.6,99.0
85,2024-03-14T00:00:00Z,96.0,92.0,99.5


##### SpO2 Intraday

In [40]:
directory_path = '../data/raw/breath/spo2/minute_spo2/'
spo2_intraday = pd.DataFrame()
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        spo2_intraday = pd.concat([spo2_intraday, temp_df])

In [41]:
spo2_intraday

Unnamed: 0,timestamp,value
0,2022-01-20T22:24:00Z,94.6
1,2022-01-20T22:25:00Z,94.5
2,2022-01-20T22:26:00Z,94.5
3,2022-01-20T22:27:00Z,94.4
4,2022-01-20T22:28:00Z,94.3
...,...,...
433,2024-03-15T07:43:36Z,87.0
434,2024-03-15T07:44:36Z,87.8
435,2024-03-15T07:45:36Z,88.1
436,2024-03-15T07:46:36Z,89.9


### Heart
#### afib_ecg

In [44]:
file_path = '../data/raw/heart/afib_ecg/afib_ecg_reading_1645266183831.csv'
afib_ecg = pd.read_csv(file_path)

In [45]:
afib_ecg

Unnamed: 0,reading_id,reading_time,wire_id,result_classification,heart_rate,heart_rate_alert,firmware_version,device_app_version,hardware_version,waveform_samples
0,ebfcaa70-916d-11ec-8080-808080808080,Sat Feb 19 10:23:03 UTC 2022,89585e84472c,NSR,91,NONE,128.6.12,2.9.0,Sense,[-32768 -32768 -32768 -32768 -32768 -3276...


#### Heart Rate

In [48]:
directory_path = '../data/raw/heart/hr/'
hr_dfs = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            temp_df = pd.json_normalize(data)
            hr_dfs.append(temp_df)

hr_df = pd.concat(hr_dfs,ignore_index=True)

In [None]:
hr_df['dateTime'] = pd.to_datetime(hr_df['dateTime'],format='%m/%d/%y %H:%M:%S')

In [None]:
hr_df.drop(columns=['value.confidence'], axis=1, inplace=True)

In [None]:
hr_df.rename(columns={'dateTime':'dateTime', 'value.bpm':'bpm'}, inplace=True)

In [None]:
hr_df.set_index('dateTime', inplace=True)

In [None]:
resampled = hr_df['bpm'].resample('1T').mean()

In [None]:
resampled.index.dtype

In [None]:
resampled = pd.DataFrame(resampled)

In [None]:
resampled['time'] = resampled.index.time
resampled['date'] = resampled.index.date

In [None]:
resampled.reset_index(inplace=True)

In [None]:
resampled.drop(columns='dateTime', inplace=True)

In [None]:
resampled.head()

#### Heart Rate Variability
##### Heart Rate Variability Summary

In [58]:
directory_path = '../data/raw/heart/hrv/hrv_summary/'
hrv_summary_dfs = []
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        hrv_summary_dfs.append(temp_df)

hrv_summary_dfs = pd.concat(hrv_summary_dfs,ignore_index=True)

In [59]:
hrv_summary_dfs

Unnamed: 0,timestamp,rmssd,nremhr,entropy
0,2022-01-13T00:00:00,29.417,64.817,2.503
1,2022-01-14T00:00:00,30.302,59.501,2.313
2,2022-01-15T00:00:00,25.009,69.301,2.297
3,2022-01-16T00:00:00,37.679,62.529,3.064
4,2022-01-18T00:00:00,36.585,59.704,2.792
...,...,...,...,...
669,2024-03-10T00:00:00,29.268,62.102,2.637
670,2024-03-11T00:00:00,32.552,65.019,2.293
671,2024-03-12T00:00:00,24.510,65.442,2.344
672,2024-03-13T00:00:00,26.532,62.264,2.564


##### Heart Rate Variability histogram

In [62]:
directory_path = '../data/raw/heart/hrv/hrv_histogram/'
hrv_histogram_dfs = []
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        hrv_histogram_dfs.append(temp_df)

hrv_histogram_dfs = pd.concat(hrv_histogram_dfs,ignore_index=True)

In [63]:
hrv_histogram_dfs

Unnamed: 0,timestamp,bucket_values
0,2022-01-13T06:43:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.0..."
1,2022-01-14T06:07:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.002..."
2,2022-01-15T08:59:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.003, 0..."
3,2022-01-16T08:57:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.0..."
4,2022-01-18T06:27:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.003..."
...,...,...
692,2024-03-11T06:24:30,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.002, 0.0..."
693,2024-03-12T09:01:30,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.003, 0.0..."
694,2024-03-13T08:24:30,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.0..."
695,2024-03-14T08:27:30,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.003, 0.15, 0.183, ..."


##### Heart Rate Variability Details

In [66]:
directory_path = '../data/raw/heart/hrv/hrv_details/'
hrv_details_dfs = []
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        temp_df = pd.read_csv(file_path)
        hrv_details_dfs.append(temp_df)

hrv_details_dfs = pd.concat(hrv_details_dfs,ignore_index=True)

In [67]:
hrv_details_dfs

Unnamed: 0,timestamp,rmssd,coverage,low_frequency,high_frequency
0,2022-01-12T22:20:00,20.969,0.846,261.484,179.967
1,2022-01-12T22:30:00,16.478,0.889,162.982,95.460
2,2022-01-12T22:35:00,21.236,0.733,405.093,242.995
3,2022-01-12T22:40:00,22.396,0.844,231.611,228.679
4,2022-01-12T22:45:00,22.470,0.890,206.510,159.706
...,...,...,...,...,...
59010,2024-03-14T08:00:00,30.652,0.919,1842.988,296.743
59011,2024-03-14T08:05:00,34.249,1.002,558.913,514.269
59012,2024-03-14T08:10:00,34.639,0.994,1906.109,444.621
59013,2024-03-14T08:15:00,31.669,1.005,624.755,365.327


In [None]:
hrv_details_dfs['timestamp'] = pd.to_datetime(hrv_details_dfs['timestamp'], format='%Y-%m-%dT%H:%M:%S')

In [None]:
hrv_details_dfs['date'] = hrv_details_dfs['timestamp'].dt.date

In [None]:
hrv_details_dfs.groupby('date').count()

##### Time in HR Zones

In [70]:
directory_path = '../data/raw/heart/time_in_hr_zones/'
time_in_hr_zones = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        example = os.path.join(directory_path, filename)
    with open(example, 'r') as file:
        data = json.load(file)
        temp_df = pd.json_normalize(data)
        time_in_hr_zones.append(temp_df)
    
time_in_hr_zones = pd.concat(time_in_hr_zones,ignore_index=True)

In [71]:
time_in_hr_zones

Unnamed: 0,dateTime,value.valuesInZones.IN_DEFAULT_ZONE_2,value.valuesInZones.IN_DEFAULT_ZONE_1,value.valuesInZones.IN_DEFAULT_ZONE_3,value.valuesInZones.BELOW_DEFAULT_ZONE_1
0,06/11/22 00:00:00,0.0,343.0,0.0,1082.0
1,06/12/22 00:00:00,0.0,182.0,0.0,1241.0
2,06/13/22 00:00:00,1.0,533.0,0.0,749.0
3,06/14/22 00:00:00,7.0,622.0,0.0,811.0
4,06/15/22 00:00:00,1.0,398.0,0.0,1032.0
...,...,...,...,...,...
610,03/10/24 00:00:00,0.0,156.0,0.0,1284.0
611,03/11/24 00:00:00,0.0,356.0,0.0,1084.0
612,03/12/24 00:00:00,0.0,47.0,0.0,1393.0
613,03/13/24 00:00:00,4.0,753.0,0.0,609.0


In [None]:
time_in_hr_zones.columns = [col.replace('value.valuesInZones.', '') for col in time_in_hr_zones.columns]

In [None]:
time_in_hr_zones['time'] = pd.to_datetime(time_in_hr_zones['dateTime']).dt.time

In [None]:
time_in_hr_zones['time'].unique()

### Sleep
#### Sleep Profile

In [76]:
sleep_profile = pd.read_csv('../data/raw/sleep/Sleep Profile.csv')

In [77]:
sleep_profile

Unnamed: 0,creation_date,sleep_type,deep_sleep,rem_sleep,sleep_duration,sleep_start_time,schedule_variability,restorative_sleep,time_before_sound_sleep,sleep_stability,nights_with_long_awakenings,days_with_naps
0,2022-07-05,Bear,97.07,20.77,7.27,23.2,86.14,81.83,24.68,3.61,10.71,0.0
1,2024-01-18,Not enough data,,,,,,,,,,
2,2024-02-01,Not enough data,,,,,,,,,,
3,2024-03-01,Bear,87.96,23.23,6.63,24.45,65.99,82.3,22.61,3.64,7.14,0.0


#### Sleep Score

In [None]:
sleep_score = pd.read_csv('../data/raw/sleep/sleep_score.csv')

In [None]:
sleep_score

#### Sleep JSON objects

In [114]:
directory_path = '../data/raw/sleep/json/'
sleep_list = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        filepath = os.path.join(directory_path, filename)
    with open(filepath, 'r') as file:
        data = json.load(file)
        temp_df = pd.json_normalize(data)
        sleep_list.append(temp_df)

sleep_df = pd.concat(sleep_list,ignore_index=True)

In [115]:
sleep_df

Unnamed: 0,logId,dateOfSleep,startTime,endTime,duration,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,timeInBed,...,levels.summary.rem.minutes,levels.summary.rem.thirtyDayAvgMinutes,levels.data,levels.shortData,levels.summary.restless.count,levels.summary.restless.minutes,levels.summary.awake.count,levels.summary.awake.minutes,levels.summary.asleep.count,levels.summary.asleep.minutes
0,35664860139,2022-02-10,2022-02-09T22:20:30.000,2022-02-10T06:08:00.000,28020000,0,415,52,0,467,...,95.0,97.0,"[{'dateTime': '2022-02-09T22:20:30.000', 'leve...","[{'dateTime': '2022-02-09T22:32:00.000', 'leve...",,,,,,
1,35651816563,2022-02-09,2022-02-08T21:42:30.000,2022-02-09T06:00:00.000,29820000,0,451,46,0,497,...,121.0,96.0,"[{'dateTime': '2022-02-08T21:42:30.000', 'leve...","[{'dateTime': '2022-02-08T23:21:30.000', 'leve...",,,,,,
2,35636525093,2022-02-08,2022-02-07T21:59:30.000,2022-02-08T06:39:30.000,31200000,0,459,61,15,520,...,119.0,95.0,"[{'dateTime': '2022-02-07T21:59:30.000', 'leve...","[{'dateTime': '2022-02-07T22:36:00.000', 'leve...",,,,,,
3,35635587864,2022-02-07,2022-02-06T22:28:30.000,2022-02-07T06:01:30.000,27180000,0,396,57,0,453,...,92.0,96.0,"[{'dateTime': '2022-02-06T22:28:30.000', 'leve...","[{'dateTime': '2022-02-06T23:18:00.000', 'leve...",,,,,,
4,35635587863,2022-02-06,2022-02-06T03:09:00.000,2022-02-06T09:27:30.000,22680000,0,343,35,0,378,...,75.0,96.0,"[{'dateTime': '2022-02-06T03:09:00.000', 'leve...","[{'dateTime': '2022-02-06T04:20:30.000', 'leve...",,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709,44760086952,2024-03-05,2024-03-05T00:19:30.000,2024-03-05T06:25:30.000,21960000,0,322,44,0,366,...,78.0,79.0,"[{'dateTime': '2024-03-05T00:19:30.000', 'leve...","[{'dateTime': '2024-03-05T01:59:00.000', 'leve...",,,,,,
710,44760086951,2024-03-04,2024-03-04T02:10:00.000,2024-03-04T07:49:00.000,20340000,0,281,58,0,339,...,56.0,87.0,"[{'dateTime': '2024-03-04T02:10:00.000', 'leve...","[{'dateTime': '2024-03-04T02:36:30.000', 'leve...",,,,,,
711,44740786190,2024-03-03,2024-03-03T03:09:30.000,2024-03-03T09:11:30.000,21720000,0,301,61,0,362,...,82.0,90.0,"[{'dateTime': '2024-03-03T03:09:30.000', 'leve...","[{'dateTime': '2024-03-03T04:57:30.000', 'leve...",,,,,,
712,44727012339,2024-03-02,2024-03-02T00:30:00.000,2024-03-02T07:43:30.000,25980000,0,383,50,0,433,...,78.0,101.0,"[{'dateTime': '2024-03-02T00:30:00.000', 'leve...","[{'dateTime': '2024-03-02T01:05:00.000', 'leve...",,,,,,


### Stress

In [118]:
stress = pd.read_csv('../data/raw/stress/Stress Score.csv')

In [119]:
stress

Unnamed: 0,DATE,UPDATED_AT,STRESS_SCORE,SLEEP_POINTS,MAX_SLEEP_POINTS,RESPONSIVENESS_POINTS,MAX_RESPONSIVENESS_POINTS,EXERTION_POINTS,MAX_EXERTION_POINTS,STATUS,CALCULATION_FAILED
0,2022-01-14T00:00:00,2022-01-14T06:29:35.631,82,0,0,0,0,0,0,READY_NOT_PREMIUM,False
1,2022-01-15T00:00:00,2022-01-15T10:08:45.675,73,0,0,0,0,0,0,READY_NOT_PREMIUM,False
2,2022-01-16T00:00:00,2022-01-16T09:18:37.96,88,0,0,0,0,0,0,READY_NOT_PREMIUM,False
3,2022-01-18T00:00:00,2022-01-18T06:59:11.594,84,0,0,0,0,0,0,READY_NOT_PREMIUM,False
4,2022-01-19T00:00:00,2022-01-19T06:20:48.915,83,0,0,0,0,0,0,READY_NOT_PREMIUM,False
...,...,...,...,...,...,...,...,...,...,...,...
676,2024-03-11T00:00:00,2024-03-12T09:03:07.685,73,19,30,28,30,26,40,READY,False
677,2024-03-12T00:00:00,2024-03-12T09:03:08.198,76,27,30,23,30,26,40,READY,False
678,2024-03-13T00:00:00,2024-03-13T08:25:54.156,71,27,30,23,30,21,40,READY,False
679,2024-03-14T00:00:00,2024-03-14T08:28:46.149,81,30,30,20,30,31,40,READY,False


### Weight

In [122]:
file_path = '../data/raw/weight/weight-2022-01-11.json'

In [123]:
with open(file_path, 'r') as file:
    weight = json.load(file)
    weight_df = pd.DataFrame(weight)

In [124]:
weight_df

Unnamed: 0,logId,weight,bmi,date,time,source,fat
0,1642031999000,197.7,23.35,01/12/22,23:59:59,API,
1,1642118399000,197.9,23.38,01/13/22,23:59:59,API,20.0
2,1642671555000,197.7,23.35,01/20/22,09:39:15,API,
3,1642809599000,200.4,23.66,01/21/22,23:59:59,API,20.0
4,1643155199000,197.5,23.32,01/25/22,23:59:59,API,20.0
5,1643414399000,195.5,23.09,01/28/22,23:59:59,API,20.0
6,1643500799000,198.6,23.45,01/29/22,23:59:59,API,20.0
7,1643587199000,197.5,23.32,01/30/22,23:59:59,API,20.0
8,1644019199000,195.9,23.14,02/04/22,23:59:59,API,20.0


In [125]:
directory_path = '../data/raw/weight/'
weight_list = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        filepath = os.path.join(directory_path, filename)
    with open(filepath, 'r') as file:
        weight = json.load(file)
        temp_df = pd.json_normalize(weight)
        weight_list.append(temp_df)

weight_df = pd.concat(weight_list,ignore_index=True)

In [126]:
weight_df

Unnamed: 0,logId,weight,bmi,date,time,source,fat
0,1642031999000,197.7,23.35,01/12/22,23:59:59,API,
1,1642118399000,197.9,23.38,01/13/22,23:59:59,API,20.000000
2,1642671555000,197.7,23.35,01/20/22,09:39:15,API,
3,1642809599000,200.4,23.66,01/21/22,23:59:59,API,20.000000
4,1643155199000,197.5,23.32,01/25/22,23:59:59,API,20.000000
...,...,...,...,...,...,...,...
121,1706864975000,224.3,26.49,02/02/24,09:09:35,Withings,21.283001
122,1706939353000,222.5,26.28,02/03/24,05:49:13,Withings,22.989000
123,1707993167000,224.2,26.47,02/15/24,10:32:47,Withings,23.756001
124,1708868398000,224.8,26.55,02/25/24,13:39:58,Withings,24.077000


Would be better to get weight data from withings as it seems more comprehensive