In [None]:
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import pandas as pd

In [None]:
filepath = ''
tree = ET.parse(filepath)
root = tree.getroot()

In [None]:
def print_elements(element, indent=""):

    print(f"{indent}Tag: {element.tag}, Attributes: {element.attrib}")
    for child in element:

        print_elements(child, indent + "  ")

In [None]:
workout_list = [workout for workout in root.findall('Workout') if workout.get(
    'workoutActivityType') == 'HKWorkoutActivityTypeSwimming']
for workout in workout_list:

    print_elements(workout)

In [None]:
STROKE_STYLE_MAP = {
    '0': 'UnknownStrokeStyle',
    '1': 'MixedStrokeStyle',
    '2': 'FreestyleStrokeStyle',
    '3': 'BackstrokeStrokeStyle',
    '4': 'BreaststrokeStrokeStyle',
    '5': 'ButterflyStrokeStyle',
    '6': 'KickboardStrokeStyle'
}


def parse_stroke_style(value):
    """Converts the stroke style value to its corresponding string."""

    return STROKE_STYLE_MAP.get(value, 'UnknownStrokeStyle')

In [None]:
def xml_to_dict(element):

    result_dict = {
        'Duration': element.attrib.get('duration', ''),
        'CreationDate': element.attrib.get('creationDate', ''),
        'StartDate': element.attrib.get('startDate', ''),
        'EndDate': element.attrib.get('endDate', '')
    }

    for parent in element:

        tag = parent.tag
        attrib_value = parent.attrib.values()

        if tag == 'MetadataEntry':

            if 'HKAverageMETs' in attrib_value:

                result_dict['AverageMETs'] =\
                    parent.attrib['value'].split(' ')[0]

            if 'HKWeatherTemperature' in attrib_value:

                result_dict['WeatherTemperature'] = parent.attrib['value']\
                    .split(' ')[0]

        if tag == 'WorkoutStatistics':

            if 'HKQuantityTypeIdentifierDistanceSwimming' in attrib_value:

                result_dict['DistanceSwimming'] = parent.attrib['sum']

            if 'HKQuantityTypeIdentifierActiveEnergyBurned' in attrib_value:

                result_dict['EnergyBurned'] = parent.attrib['sum']

        if (tag == 'WorkoutEvent' and 'HKWorkoutEventTypeLap' in parent.attrib.values()):

            for child in parent:

                stroke_style = parse_stroke_style(child.attrib['value'])
                result_dict[stroke_style] = result_dict.get(
                    stroke_style, 0) + 1

    return result_dict

In [None]:
workouts = [xml_to_dict(workout) for workout in workout_list]

In [None]:
df = pd.DataFrame(workouts)

In [None]:
df.tail()

In [None]:
df.dtypes

In [None]:
for col in ['CreationDate', 'StartDate', 'EndDate']:
    df[col] = pd.to_datetime(df[col])

In [None]:
for col in ['Duration', 'AverageMETs', 'WeatherTemperature', 'DistanceSwimming', 'EnergyBurned']:
    df[col] = pd.to_numeric(df[col])

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
df.isna().sum()

In [None]:
# fill missing StrokeStyle columns values with 0
df['FreestyleStrokeStyle'] = df['FreestyleStrokeStyle'].fillna(0)
df['BreaststrokeStrokeStyle'] = df['BreaststrokeStrokeStyle'].fillna(0)
df['BackstrokeStrokeStyle'] = df['BackstrokeStrokeStyle'].fillna(0)
df['ButterflyStrokeStyle'] = df['ButterflyStrokeStyle'].fillna(0)
df['MixedStrokeStyle'] = df['MixedStrokeStyle'].fillna(0)

In [None]:
# fill missing WeatherTemperature values with mean
df['WeatherTemperature'] = df['WeatherTemperature'].fillna(
    df['WeatherTemperature'].mean())

In [None]:
df.isna().sum()

In [None]:
df.tail()

In [None]:
# DistanceSwimming in meters per month
sns.lineplot(x=df['StartDate'].dt.day, y=df['DistanceSwimming'])
plt.show()

In [None]:
# calories burned per day
sns.histplot(x=df['EnergyBurned'], bins=20)
plt.show()

In [None]:
# total distance, total time, calories mean, weather mean
total_distance = df['DistanceSwimming'].sum()
total_time = df['Duration'].sum()
total_calories = df['EnergyBurned'].sum()
weather_mean = df['WeatherTemperature'].mean()
# convert from degree/F to degree
weather_mean_degrees = (weather_mean - 32) * 5/9

print(f"Total distance: {total_distance} meters")
print(f"Total time: {total_time} minutes")
print(f"Total Calories: {total_calories} calories")
print(f"Weather mean: {weather_mean_degrees} degrees")