## Health Metrics - Data Cleaning

### Setup Packages and Config

In [57]:
import pandas as pd
import numpy as np

### Import Data

In [61]:
# Load the JSON data
data = pd.read_json('data.json')["data"]

# Extract 'workouts' and 'metrics' DataFrames
wdf = pd.DataFrame(data["workouts"])
mdf = pd.DataFrame(data["metrics"])

# Check the first few rows to understand the structure
print(wdf.head())
print(mdf.head())

                                     id                              name  \
0  1A92CCA1-80B0-4C44-9FA5-DD5A2756478C                     Core Training   
1  BAD3330A-E7F0-4C54-9257-43075838357D  High Intensity Interval Training   
2  00DA3796-BA60-42EB-B28A-74AD67C5C1B5                   Outdoor Cycling   
3  AC234210-6900-45E3-A39F-7EF6505914FE  High Intensity Interval Training   
4  2FE239D9-3311-4C24-8A4B-C876EEAC64FB  High Intensity Interval Training   

                       start                        end    duration  \
0  2021-03-11 07:03:11 -1000  2021-03-11 07:14:18 -1000  636.608887   
1  2021-03-24 14:44:10 -1000  2021-03-24 14:51:11 -1000  421.000305   
2  2021-03-26 03:31:24 -1000  2021-03-26 03:40:00 -1000  515.888359   
3  2021-04-01 11:30:19 -1000  2021-04-01 11:54:35 -1000  948.742759   
4  2021-04-06 10:28:53 -1000  2021-04-06 10:34:36 -1000    5.955130   

                              activeEnergyBurned  \
0   {'units': 'kcal', 'qty': 24.111050124606475}   
1   {'

### Clean the Data

In [62]:
# Flatten the nested columns in the 'workouts' DataFrame
def extract_qty_column(df, column_name):
    if column_name in df.columns:
        df[f'{column_name}_qty'] = df[column_name].apply(lambda x: x['qty'] if isinstance(x, dict) else x)
    else:
        df[f'{column_name}_qty'] = None
    return df

# Extract the qty from all relvant columns
columnns_to_extract = ['activeEnergyBurned', 'distance', 'lapLength', 'intensity', 'humidity', 'temperature']
for column_name in columnns_to_extract:
    wdf = extract_qty_column(wdf, column_name)
# Drop the original columns
wdf.drop(columns=columnns_to_extract, axis=1, inplace=True)
