## Activities and Weight Change

### Setup

In [None]:
import pathlib

In [None]:
import pandas as pd

In [None]:
date_partition = '20200927'

In [None]:
home = pathlib.Path.home()

In [None]:
data_input_path = f"{home}/small-data/apple-health-csv/full-extract/{date_partition}"

In [None]:
study_path = f"{home}/small-data/study/health-stories/{date_partition}"

In [None]:
pathlib.Path(study_path).mkdir(parents=True,exist_ok=True)

In [None]:
activity_summary_date_interval_map = pd.read_csv(f"{data_input_path}/activity-summary-dates-intervals.csv",
                                                 parse_dates=['date', 'interval_start', 'interval_end'])

In [None]:
activity_summary_date_interval_map['interval_key'] = activity_summary_date_interval_map['interval_start'].apply(lambda x: x.strftime("%Y%m%d")) + "T" + activity_summary_date_interval_map['interval_end'].apply(lambda x: x.strftime("%Y%m%d"))

### Load activity summary data and combine with the interval map

In [None]:
activity_summary = pd.read_csv(f"{data_input_path}/activity-summary.csv", parse_dates=['dateComponents'])

In [None]:
activity_summary = activity_summary.loc[:, ["dateComponents", "activeEnergyBurned", "appleExerciseTime"]]

In [None]:
activity_summary = activity_summary.rename(columns = {
    'dateComponents': 'date',
    'activeEnergyBurned': 'active_energy_burned',
    'appleExerciseTime': 'apple_exercise_time'
})

In [None]:
activity_summary_with_intervals = pd.merge(activity_summary, 
                                      activity_summary_date_interval_map, 
                                      left_on='date', right_on='date') 

In [None]:
activity_summary_with_intervals

### Remove 0 calories activity summary

Apple Watch had created a 0-calorie burned line item in the activity summary for the day prior to Watch activation date.

In [None]:
activity_summary_with_intervals = \
activity_summary_with_intervals[activity_summary_with_intervals['active_energy_burned'] > 0]

### Combine movement summary data (walking, running, doing chores at home, etc)

In [None]:
movement = pd.read_csv(f"{data_input_path}/distance-walking-running-summary.csv", parse_dates=['date'])

In [None]:
ext_activity_summary = pd.merge(activity_summary_with_intervals, movement[['date', 'movement_distance']],
                         left_on='date', right_on='date')

### Combine run workout summary

In [None]:
runs = pd.read_csv(f"{data_input_path}/workout-summary-run.csv", parse_dates=['date'])

In [None]:
runs = runs.rename(columns={
    'duration': 'run_duration',
    'distance': 'run_distance',
    'energy_burned': 'run_energy_burned'
})

In [None]:
ext_activity_summary = pd.merge(ext_activity_summary, 
                          runs[['date', 'run_duration', 'run_distance', 'run_energy_burned']], 
                          left_on='date', right_on='date', how='left')

### Combine vo2max summary

In [None]:
vo2max = pd.read_csv(f"{data_input_path}/vo2max-summary.csv", parse_dates=['date'])

In [None]:
ext_activity_summary = pd.merge(ext_activity_summary, 
                          vo2max[['date', 'vo2max']], 
                          left_on='date', right_on='date', how='left')

### Combine resting heart rate summary

In [None]:
resting_heart_rate = pd.read_csv(f"{data_input_path}/resting-heart-rate-summary.csv", parse_dates=['date'])

In [None]:
ext_activity_summary = pd.merge(ext_activity_summary, 
                          resting_heart_rate[['date', 'resting_heart_rate']], 
                          left_on='date', right_on='date', how='left')

### Calculate interval averages

In [None]:
ext_activity_summary_groups = ext_activity_summary.groupby('interval_key', as_index=False)

In [None]:
ext_activity_summary_groups.describe().to_csv(f"{data_input_path}/health-metrics-statistics.csv", index=False)

In [None]:
ext_activity_summary_groups.count().to_csv(f"{data_input_path}/health-metrics-counts.csv", index=False)

In [None]:
ext_activity_summary_interval_averages = ext_activity_summary_groups.mean()

In [None]:
# ext_activity_summary_groups.describe()

### Add interval information to ext_activity_summary_interval_averages

#### drop_duplicates() ~ SELECT DISTINCT

In [None]:
ext_activity_summary_interval_averages = pd.merge(ext_activity_summary_interval_averages,
        activity_summary_date_interval_map[['interval_key', 'interval_start', 'interval_end']],
        left_on='interval_key',
        right_on='interval_key').drop_duplicates()

### Load weight history and create interval weight changes

In [None]:
weights = pd.read_csv(f"{data_input_path}/bodymass-summary.csv", parse_dates=['date'])

In [None]:
### Create starting weight of each interval

In [None]:
starting_weight = ext_activity_summary_interval_averages.loc[:, ['interval_key', 'interval_start']]

In [None]:
starting_weight = pd.merge(starting_weight, weights[['date', 'bodymass']], left_on='interval_start', right_on='date')

In [None]:
ending_weight = ext_activity_summary_interval_averages.loc[:, ['interval_key','interval_end']]

In [None]:
ending_weight = pd.merge(ending_weight, weights[['date', 'bodymass']], left_on='interval_end', right_on='date')

In [None]:
interval_weight = pd.merge(starting_weight, ending_weight, left_on='interval_key', right_on='interval_key')

In [None]:
interval_weight = interval_weight.rename(columns={
    'bodymass_x': 'start_weight',
    'bodymass_y': 'end_weight'
})

In [None]:
del interval_weight['date_x'], interval_weight['date_y']

In [None]:
interval_weight['weight_change'] = interval_weight['end_weight'] - interval_weight['start_weight']

In [None]:
interval_weight['cumul_weight_change'] = interval_weight['weight_change'].cumsum()

### Combine weight interval data with health metrics 

In [None]:
weight_and_activity = pd.merge(interval_weight, 
                               ext_activity_summary_interval_averages, 
                               left_on='interval_key', right_on='interval_key')

In [None]:
del weight_and_activity['interval_start_x'], weight_and_activity['interval_end_x']

In [None]:
weight_and_activity = weight_and_activity.rename(columns={
    "interval_start_y" : "interval_start_date",
    "interval_end_y": "next_interval_start_date"
})

In [None]:
weight_and_activity

In [None]:
weight_and_activity.to_csv(f"{data_input_path}/weight_after_activities_interval.csv", index=False)