In [2]:
import xml.etree.ElementTree as ET
import pandas as pd
from tqdm import tqdm
import os

In [10]:
import os
import pandas as pd
import xml.etree.ElementTree as ET

activity_files = [os.path.join(root, file) for root, _, files in os.walk(os.path.join('.', 'activities', 'tcx')) for file in files if file.endswith('.tcx')]

print(f'Found {len(activity_files)} activity files')

ns = {'default': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2', 'ext': 'http://www.garmin.com/xmlschemas/ActivityExtension/v2'}

for act in tqdm(activity_files):
    tree = ET.parse(act)
    root = tree.getroot()
    trackpoints = root.findall('.//default:Trackpoint', ns)

    data = []

    last_altitude = None
    last_distance = None
    last_speed = None

    for tp in trackpoints:
        time = tp.find('default:Time', ns).text
        latitude = tp.find('default:Position/default:LatitudeDegrees', ns).text
        longitude = tp.find('default:Position/default:LongitudeDegrees', ns).text

        altitude = float(tp.find('default:AltitudeMeters', ns).text)
        altitude_diff = altitude - last_altitude if last_altitude is not None else 0
        last_altitude = altitude

        distance = float(tp.find('default:DistanceMeters', ns).text)
        distance_diff = distance - last_distance if last_distance is not None else 0
        last_distance = distance

        heart_rate = tp.find('default:HeartRateBpm/default:Value', ns).text
        speed_kmh = float(tp.find('default:Extensions/ext:TPX/ext:Speed', ns).text) * 3.6

        grade = altitude_diff / distance_diff if distance_diff != 0 else 0

        


        data.append([time, latitude, longitude, altitude, altitude_diff, distance, speed_kmh, grade, heart_rate, heart_rate_1min, heart_rate_5min, heart_rate_10min])

    df = pd.DataFrame(data, columns=['Time', 'Latitude', 'Longitude', 'Altitude', 'AltitudeDiff', 'Distance', 'Speed', 'Grade', 'HeartRate', 'HeartRate1min', 'HeartRate5min', 'HeartRate10min'])

    csv_path = os.path.join('.', 'activities', 'csv', os.path.relpath(act, os.path.join('.', 'activities', 'tcx'))).replace('.tcx', '.csv')
    os.makedirs(os.path.dirname(csv_path), exist_ok=True)
    df.to_csv(csv_path, index=False)

kaggle_files = [os.path.join(root, file) for root, _, files in os.walk(os.path.join('.', 'activities', 'csv', 'kaggle')) for file in files if file.endswith('.csv')]

print(f'Found {len(kaggle_files)} kaggle files')

df = pd.concat([pd.read_csv(file).assign(Activity=os.path.basename(file).replace('.csv', '')) for file in kaggle_files])

print(df.head())

Found 37 activity files


  0%|          | 0/37 [00:00<?, ?it/s]

100%|██████████| 37/37 [00:23<00:00,  1.59it/s]


Found 33 kaggle files
                   Time   Latitude  Longitude  Altitude  AltitudeDiff  \
0  2016-07-09T12:50:25Z  30.312931 -97.732638     209.6           0.0   
1  2016-07-09T12:50:26Z  30.312892 -97.732639     209.6           0.0   
2  2016-07-09T12:50:27Z  30.312861 -97.732631     209.6           0.0   
3  2016-07-09T12:50:28Z  30.312823 -97.732617     210.0           0.0   
4  2016-07-09T12:50:29Z  30.312788 -97.732592     210.2           0.0   

   Distance  Speed  Grade  HeartRate  Acceleration  HeartRate1min  \
0       3.9   0.00      0         88             0           88.0   
1       8.0   0.00      0         88             0           88.0   
2      12.0   0.00      0         87             0           87.0   
3      16.2  14.76      0         88             0           88.0   
4      20.3  14.76      0         88             0           88.0   

   HeartRate5min  HeartRate10min  Activity  
0           88.0            88.0  360_Loop  
1           88.0            88.0  