# Fitbit: Exploratory data analysis

Assumes that environment variables have been defined for locating 'weight-*.json' files. See the `.env.example` file for guidance.

- GLOBAL_EXPORT_DATA_DIR_NAME : Path to the 'Global Export Data' directory, which contains files exported from a Fitbit account.
- CLEAN_DATA_DIR_NAME : Path to the directory where clean parquet files are saved.
- WEIGHT_FILE_PREFIX : Part of a file name that identifies the file as weight data, e.g. 'weight-'.
- WEIGHT_FILE_SUFFIX : File type suffix, e.g. '.json'.

In [1]:
from dotenv import load_dotenv
import os
from pathlib import Path
import polars as pl

In [2]:
# Path to Global Export Data directory
_ = load_dotenv()
GLOBAL_EXPORT_DATA_DIR_NAME = os.getenv('GLOBAL_EXPORT_DATA_DIR_NAME')
CLEAN_DATA_DIR_NAME = os.getenv('CLEAN_DATA_DIR_NAME')
WEIGHT_FILE_PREFIX = os.getenv('WEIGHT_FILE_PREFIX', default='weight-')
WEIGHT_FILE_SUFFIX = os.getenv('WEIGHT_FILE_SUFFIX', default='.json')
sample_monthly_weight_file_name = (
    f'{WEIGHT_FILE_PREFIX}2024-07-20{WEIGHT_FILE_SUFFIX}'
)

## Reads a single file.

In [3]:
df = pl.read_json(
    Path(GLOBAL_EXPORT_DATA_DIR_NAME).joinpath(sample_monthly_weight_file_name)
)
print(df)

shape: (26, 6)
┌───────────────┬────────┬───────┬──────────┬──────────┬────────┐
│ logId         ┆ weight ┆ bmi   ┆ date     ┆ time     ┆ source │
│ ---           ┆ ---    ┆ ---   ┆ ---      ┆ ---      ┆ ---    │
│ i64           ┆ f64    ┆ f64   ┆ str      ┆ str      ┆ str    │
╞═══════════════╪════════╪═══════╪══════════╪══════════╪════════╡
│ 1721455821000 ┆ 214.6  ┆ 26.82 ┆ 07/20/24 ┆ 06:10:21 ┆ API    │
│ 1721519999000 ┆ 214.6  ┆ 26.82 ┆ 07/20/24 ┆ 23:59:59 ┆ API    │
│ 1721550936000 ┆ 213.7  ┆ 26.71 ┆ 07/21/24 ┆ 08:35:36 ┆ API    │
│ 1721630868000 ┆ 219.0  ┆ 27.37 ┆ 07/22/24 ┆ 06:47:48 ┆ API    │
│ 1721721955000 ┆ 215.9  ┆ 26.99 ┆ 07/23/24 ┆ 08:05:55 ┆ API    │
│ …             ┆ …      ┆ …     ┆ …        ┆ …        ┆ …      │
│ 1723179214000 ┆ 219.7  ┆ 27.46 ┆ 08/09/24 ┆ 04:53:34 ┆ API    │
│ 1723247999000 ┆ 219.7  ┆ 27.46 ┆ 08/09/24 ┆ 23:59:59 ┆ API    │
│ 1723366762000 ┆ 218.0  ┆ 27.25 ┆ 08/11/24 ┆ 08:59:22 ┆ API    │
│ 1723438017000 ┆ 215.7  ┆ 26.96 ┆ 08/12/24 ┆ 04:46:57 ┆ API 

## Concatenates all the 'weight-*.json' files in a directory into a Dataframe.

In [4]:
df = None
for entry in Path(GLOBAL_EXPORT_DATA_DIR_NAME).iterdir():
    if (
        entry.name.startswith(WEIGHT_FILE_PREFIX)
        and entry.name.endswith(WEIGHT_FILE_SUFFIX)
        and entry.is_file()
    ):
        entry_as_df = pl.read_json(
            os.path.join(GLOBAL_EXPORT_DATA_DIR_NAME, entry.name)
        )
        df = entry_as_df if df is None else df.vstack(entry_as_df)
df.shape

(341, 6)

In [5]:
df

logId,weight,bmi,date,time,source
i64,f64,f64,str,str,str
1628294399000,242.0,30.25,"""08/06/21""","""23:59:59""","""API"""
1718927999000,223.2,27.9,"""06/20/24""","""23:59:59""","""API"""
1719187199000,223.0,27.87,"""06/23/24""","""23:59:59""","""API"""
1719273599000,224.0,28.0,"""06/24/24""","""23:59:59""","""API"""
1719359999000,223.8,27.97,"""06/25/24""","""23:59:59""","""API"""
…,…,…,…,…,…
1710719999000,225.0,28.12,"""03/17/24""","""23:59:59""","""API"""
1710806399000,225.2,28.15,"""03/18/24""","""23:59:59""","""API"""
1710892799000,225.8,28.22,"""03/19/24""","""23:59:59""","""API"""
1710979199000,228.6,28.57,"""03/20/24""","""23:59:59""","""API"""


## Clean the data

In [6]:
# Only keep the first weight measurement per day.
df = df.sort(['date', 'time']).unique(subset=['date'], keep='first')

In [7]:
df.shape

(339, 6)

In [8]:
# Convert date as string to date object.
df = (
    df
    .with_columns(
        pl.col('date').str.to_date("%m/%d/%y")
    )
    .drop(['time', 'logId', 'source'])
    .with_columns(
        pl.lit("pounds").alias("weight_units")
    )
    .sort('date')
)
df

weight,bmi,date,weight_units
f64,f64,date,str
219.0,27.37,2016-12-30,"""pounds"""
223.0,27.87,2017-08-06,"""pounds"""
228.0,28.5,2018-08-08,"""pounds"""
222.0,27.75,2019-08-06,"""pounds"""
235.0,29.37,2020-08-06,"""pounds"""
…,…,…,…
220.4,27.55,2024-09-02,"""pounds"""
225.2,28.15,2024-09-03,"""pounds"""
224.6,28.07,2024-09-04,"""pounds"""
222.2,27.77,2024-09-05,"""pounds"""


## Writes the Dataframe to a Parquet file for future use.

In [9]:
print(f'{CLEAN_DATA_DIR_NAME=}')
type(CLEAN_DATA_DIR_NAME)

CLEAN_DATA_DIR_NAME='/Users/jimtyhurst/Dropbox/sync/data-sync/fitbit/takeout-20240906T231211Z-001/cleaned_data'


str

In [10]:
df.write_parquet(Path(CLEAN_DATA_DIR_NAME).joinpath('weight.parquet'))

## Reads from Parquet file.

In [11]:
df_persisted = pl.read_parquet(
    os.path.join(CLEAN_DATA_DIR_NAME, 'weight.parquet')
)
df_persisted.shape

(339, 4)

In [12]:
df.equals(df_persisted)

True

## Plots

In [13]:
# TODO


In [14]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [15]:
df = df.filter(pl.col('date') >= pl.lit("2023-01-01").str.to_date("%Y-%m-%d"))
df

weight,bmi,date,weight_units
f64,f64,date,str
235.8,29.47,2023-08-01,"""pounds"""
236.2,29.52,2023-08-02,"""pounds"""
236.8,29.6,2023-08-03,"""pounds"""
236.2,29.52,2023-08-04,"""pounds"""
236.4,29.55,2023-08-05,"""pounds"""
…,…,…,…
220.4,27.55,2024-09-02,"""pounds"""
225.2,28.15,2024-09-03,"""pounds"""
224.6,28.07,2024-09-04,"""pounds"""
222.2,27.77,2024-09-05,"""pounds"""


In [17]:
def weight_bounds(df: pl.DataFrame) -> map:
    min_wt = df.select(pl.min('weight')).item()
    max_wt = df.select(pl.max('weight')).item()
    mean_wt = df.select(pl.mean('weight')).item()
    return {
        'starting_date': df.select(pl.min('date').dt.to_string("iso")).item(),
        'ending_date': df.select(pl.max('date').dt.to_string("iso")).item(),
        'min_wt': min_wt,
        'min_wt_date': df.filter(pl.col('weight') == min_wt).select(pl.max('date').dt.to_string("iso")).item(),
        'max_wt': max_wt,
        'max_wt_date': df.filter(pl.col('weight') == max_wt).select(pl.max('date').dt.to_string("iso")).item(),
        'mean_wt': mean_wt,
        'weight_units': df.select('weight_units').item(0, 0)
    }

In [18]:
bounds = weight_bounds(df)
bounds

{'starting_date': '2023-08-01',
 'ending_date': '2024-09-06',
 'min_wt': 211.6,
 'min_wt_date': '2024-07-31',
 'max_wt': 237.4,
 'max_wt_date': '2023-08-10',
 'mean_wt': 222.7343373493976,
 'weight_units': 'pounds'}

In [44]:
df.filter(pl.col('date') == pl.max('date')).select('weight').item()

220.6

In [20]:
latest_weight = df.filter(pl.col('date') == pl.max('date')).select('weight').item()
print(f"latest weight: {latest_weight} {bounds['weight_units']} on {bounds['ending_date']}")

latest weight: 220.6 pounds on 2024-09-06


In [21]:
ax = df.plot(x='date', y='pounds', kind='line')
bounds = weight_bounds(df)

# Labels
plt.suptitle("Daily Weight")
plt.title(f"{bounds['starting_date']} - {bounds['ending_date']}", fontsize=10)
plt.ylabel('pounds')

# Upper/Lower Bounds
ax.axhline(y=bounds['max_wt'], color='red')
plt.text(bounds['ending_date'], bounds['max_wt'], f"max weight: {bounds['max_wt']:.0f}", ha='right', va='bottom')
ax.axhline(y=bounds['mean_wt'], color='blue')
plt.text(bounds['starting_date'], bounds['mean_wt'], f"mean weight: {bounds['mean_wt']:.0f}", ha='left', va='bottom')
ax.axhline(y=bounds['min_wt'], color='green')
plt.text(bounds['ending_date'], bounds['min_wt'], f"min weight: {bounds['min_wt']:.0f}", ha='right', va='bottom')

# Shows Latest Weight on plot
plt.text(bounds['ending_date'], bounds['mean_wt'] - ((bounds['mean_wt'] - bounds['min_wt']) / 2), f"latest weight: {latest_weight:.0f}", ha='left', va='bottom')
plt.grid()
plt.show()

ModuleUpgradeRequiredError: altair>=5.4.0 is required for `.plot`