# Fitbit: Exploratory data analysis

Assumes that environment variables have been defined for locating 'weight-*.json' files. See the `.env.example` file for guidance.

- GLOBAL_EXPORT_DATA_DIR_NAME : Path to the 'Global Export Data' directory, which contains files exported from a Fitbit account.
- CLEAN_DATA_DIR_NAME : Path to the directory where clean parquet files are saved.
- WEIGHT_FILE_PREFIX : Part of a file name that identifies the file as weight data, e.g. 'weight-'.
- WEIGHT_FILE_SUFFIX : File type suffix, e.g. '.json'.

In [1]:
from dotenv import load_dotenv
import os
from pathlib import Path
import polars as pl

In [2]:
# Path to Global Export Data directory
load_dotenv()
GLOBAL_EXPORT_DATA_DIR_NAME = os.getenv('GLOBAL_EXPORT_DATA_DIR_NAME')
CLEAN_DATA_DIR_NAME = os.getenv('CLEAN_DATA_DIR_NAME')
WEIGHT_FILE_PREFIX = os.getenv('WEIGHT_FILE_PREFIX', default='weight-')
WEIGHT_FILE_SUFFIX = os.getenv('WEIGHT_FILE_SUFFIX', default='.json')
sample_monthly_weight_file_name = (
    f'{WEIGHT_FILE_PREFIX}2024-07-20{WEIGHT_FILE_SUFFIX}'
)

## Reads a single file.

In [3]:
df = pl.read_json(
    Path(GLOBAL_EXPORT_DATA_DIR_NAME).joinpath(sample_monthly_weight_file_name)
)
print(df)

shape: (26, 6)
┌───────────────┬────────┬───────┬──────────┬──────────┬────────┐
│ logId         ┆ weight ┆ bmi   ┆ date     ┆ time     ┆ source │
│ ---           ┆ ---    ┆ ---   ┆ ---      ┆ ---      ┆ ---    │
│ i64           ┆ f64    ┆ f64   ┆ str      ┆ str      ┆ str    │
╞═══════════════╪════════╪═══════╪══════════╪══════════╪════════╡
│ 1721455821000 ┆ 214.6  ┆ 26.82 ┆ 07/20/24 ┆ 06:10:21 ┆ API    │
│ 1721519999000 ┆ 214.6  ┆ 26.82 ┆ 07/20/24 ┆ 23:59:59 ┆ API    │
│ 1721550936000 ┆ 213.7  ┆ 26.71 ┆ 07/21/24 ┆ 08:35:36 ┆ API    │
│ 1721630868000 ┆ 219.0  ┆ 27.37 ┆ 07/22/24 ┆ 06:47:48 ┆ API    │
│ 1721721955000 ┆ 215.9  ┆ 26.99 ┆ 07/23/24 ┆ 08:05:55 ┆ API    │
│ …             ┆ …      ┆ …     ┆ …        ┆ …        ┆ …      │
│ 1723179214000 ┆ 219.7  ┆ 27.46 ┆ 08/09/24 ┆ 04:53:34 ┆ API    │
│ 1723247999000 ┆ 219.7  ┆ 27.46 ┆ 08/09/24 ┆ 23:59:59 ┆ API    │
│ 1723366762000 ┆ 218.0  ┆ 27.25 ┆ 08/11/24 ┆ 08:59:22 ┆ API    │
│ 1723438017000 ┆ 215.7  ┆ 26.96 ┆ 08/12/24 ┆ 04:46:57 ┆ API 

## Concatenates all the 'weight-*.json' files in a directory into a Dataframe.

In [4]:
df = None
for entry in Path(GLOBAL_EXPORT_DATA_DIR_NAME).iterdir():
    if (
        entry.name.startswith(WEIGHT_FILE_PREFIX)
        and entry.name.endswith(WEIGHT_FILE_SUFFIX)
        and entry.is_file()
    ):
        entry_as_df = pl.read_json(
            os.path.join(GLOBAL_EXPORT_DATA_DIR_NAME, entry.name)
        )
        df = entry_as_df if df is None else df.vstack(entry_as_df)
df.shape

(341, 6)

In [5]:
df

logId,weight,bmi,date,time,source
i64,f64,f64,str,str,str
1628294399000,242.0,30.25,"""08/06/21""","""23:59:59""","""API"""
1718927999000,223.2,27.9,"""06/20/24""","""23:59:59""","""API"""
1719187199000,223.0,27.87,"""06/23/24""","""23:59:59""","""API"""
1719273599000,224.0,28.0,"""06/24/24""","""23:59:59""","""API"""
1719359999000,223.8,27.97,"""06/25/24""","""23:59:59""","""API"""
…,…,…,…,…,…
1710719999000,225.0,28.12,"""03/17/24""","""23:59:59""","""API"""
1710806399000,225.2,28.15,"""03/18/24""","""23:59:59""","""API"""
1710892799000,225.8,28.22,"""03/19/24""","""23:59:59""","""API"""
1710979199000,228.6,28.57,"""03/20/24""","""23:59:59""","""API"""


## Clean the data

In [6]:
# Only keep the first weight measurement per day.
df = df.sort(['date', 'time']).unique(subset=['date'], keep='first')

In [7]:
df.shape

(339, 6)

## Writes the Dataframe to a Parquet file for future use.

In [8]:
print(f'{CLEAN_DATA_DIR_NAME=}')
type(CLEAN_DATA_DIR_NAME)

CLEAN_DATA_DIR_NAME='/Users/jimtyhurst/Dropbox/sync/data-sync/fitbit/takeout-20240906T231211Z-001/cleaned_data'


str

In [9]:
df.write_parquet(Path(CLEAN_DATA_DIR_NAME).joinpath('weight.parquet'))

## Reads from Parquet file.

In [10]:
df_persisted = pl.read_parquet(
    os.path.join(CLEAN_DATA_DIR_NAME, 'weight.parquet')
)
df_persisted.shape

(339, 6)

In [11]:
df.equals(df_persisted)

True

## Plots

In [12]:
# TODO
