## Activities and Weight Change

### Setup

In [None]:
import pathlib

In [None]:
import pandas as pd

In [None]:
date_partition = '20200925'

In [None]:
home = pathlib.Path.home()

In [None]:
data_input_path = f"{home}/small-data/apple-health-csv/full-extract/{date_partition}"

In [None]:
study_path = f"{home}/small-data/study/health-stories/{date_partition}"

In [None]:
pathlib.Path(study_path).mkdir(parents=True,exist_ok=True)

In [None]:
activity_summary_date_interval_map = pd.read_csv(f"{data_input_path}/activity-summary-dates-intervals.csv",
                                                 parse_dates=['date', 'interval_start', 'interval_end'])

In [None]:
activity_summary_date_interval_map['interval_key'] = activity_summary_date_interval_map['interval_start'].apply(lambda x: x.strftime("%Y%m%d")) + "T" + activity_summary_date_interval_map['interval_end'].apply(lambda x: x.strftime("%Y%m%d"))

In [None]:
activity_summary = pd.read_csv(f"{data_input_path}/activity-summary.csv", parse_dates=['dateComponents'])

In [None]:
activity_summary = activity_summary.loc[:, ["dateComponents", "activeEnergyBurned", "appleExerciseTime"]]

In [None]:
activity_summary = activity_summary.rename(columns = {
    'dateComponents': 'date',
    'activeEnergyBurned': 'active_energy_burned',
    'appleExerciseTime': 'apple_exercise_time'
})

In [None]:
activity_summary_date_interval_map

In [None]:
activity_summary.dtypes

In [None]:
activity_summary_date_interval_map.dtypes

In [None]:
activity_summary_intervals = pd.merge(activity_summary, 
                                      activity_summary_date_interval_map, 
                                      left_on='date', right_on='date') 

In [None]:
activity_summary_intervals

In [None]:
activity_summary_intervals = activity_summary_intervals[activity_summary_intervals['active_energy_burned'] > 0]

In [None]:
activity_summary_intervals 

In [None]:
movement = pd.read_csv(f"{data_input_path}/distance-walking-running-summary.csv", parse_dates=['date'])

In [None]:
movement

In [None]:
health_metrics = pd.merge(activity_summary_intervals, movement[['date', 'movement_distance']],
                         left_on='date', right_on='date')

In [None]:
runs = pd.read_csv(f"{data_input_path}/workout-summary-run.csv", parse_dates=['date'])

In [None]:
runs = runs.rename(columns={
    'duration': 'run_duration',
    'distance': 'run_distance',
    'energy_burned': 'run_energy_burned'
})

In [None]:
health_metrics = pd.merge(health_metrics, 
                          runs[['date', 'run_duration', 'run_distance', 'run_energy_burned']], 
                          left_on='date', right_on='date', how='left')

In [None]:
vo2max = pd.read_csv(f"{data_input_path}/vo2max-summary.csv", parse_dates=['date'])

In [None]:
health_metrics = pd.merge(health_metrics, 
                          vo2max[['date', 'vo2max']], 
                          left_on='date', right_on='date', how='left')

In [None]:
resting_heart_rate = pd.read_csv(f"{data_input_path}/resting-heart-rate-summary.csv", parse_dates=['date'])

In [None]:
resting_heart_rate

In [None]:
health_metrics = pd.merge(health_metrics, 
                          resting_heart_rate[['date', 'resting_heart_rate']], 
                          left_on='date', right_on='date', how='left')

### Calculate interval averages

In [None]:
health_metrics_groups = health_metrics.groupby('interval_key', as_index=False)

In [None]:
health_metrics_groups.describe().to_csv(f"{data_input_path}/health-metrics-statistics.csv", index=False)

In [None]:
health_metrics_groups.count().to_csv(f"{data_input_path}/health-metrics-counts.csv", index=False)

In [None]:
health_metrics_interval_averages = health_metrics_groups.mean()

In [None]:
health_metrics_interval_averages

In [None]:
health_metrics_groups.describe()

In [None]:
health_metrics_grouped_intervals = activity_summary_date_interval_map.loc[
    activity_summary_date_interval_map['date'] == activity_summary_date_interval_map['interval_start'],
    ["interval_key", "interval_start", "interval_end"]
]

In [None]:
health_metrics_grouped_intervals

In [None]:
health_metrics_interval_averages_with_interval_dates = pd.merge(health_metrics_interval_averages, 
                                                                health_metrics_grouped_intervals,
                                                                left_on='interval_key', right_on='interval_key')

In [None]:
health_metrics_interval_averages_with_interval_dates

In [None]:
weight_first_date_map = pd.merge(activity_summary_date_interval_map, 
                                 health_metrics_interval_averages[['interval_key']], 
                                 left_on='interval_key', right_on='interval_key')

In [None]:
weight_first_date_map = weight_first_date_map[weight_first_date_map['date'] == weight_first_date_map['interval_start']]

In [None]:
weight_first_date_map

In [None]:
weights = pd.read_csv(f"{data_input_path}/bodymass-summary.csv", parse_dates=['date'])

In [None]:
weights

In [None]:
month_first_date_weights = pd.merge(weights[['date', 'bodymass']], 
                                    weight_first_date_map, left_on='date', right_on='date')

In [None]:
month_first_date_weights

In [None]:
month_first_date_weights.dtypes

In [None]:
month_first_date_weights['weight_change'] = month_first_date_weights['bodymass'].diff(periods=1)

In [None]:
month_first_date_weights

In [None]:
month_first_date_weights['cumul_weight_change'] = month_first_date_weights['weight_change'].cumsum()

In [None]:
month_first_date_weights

In [None]:
interval_activities_weight_changes = pd.merge(month_first_date_weights,
                                              health_metrics_interval_averages_with_interval_dates,
                                              left_on='interval_start',
                                              right_on='interval_end', how='right')

In [None]:
weight = weights[weights['date'] == '2020-09-25']['bodymass']

In [None]:
weight

In [None]:
interval_activities_weight_changes