This notebook reads a directory of FIT files and assembles all data into a pandas dataframe

In [None]:
import numpy as np
import pandas as pd

import time, sys

#import xml.etree.ElementTree as ET
#import lxml.etree
import fitparse

#import datetime
#from datetime import datetime, timedelta

#import dateutil.parser as dp

import os



#debug = True

input_types = ['FIT', 'TCX', 'GPX']
input_type = input_types[0]


FIT file input (it has more data than the TCX export)

In [None]:
def get_data_from_record(record):
    new_record = {}
    for data in record:
        #____
        if data.name.find('accumulated_power') >= 0:
            new_record['acc_power'] = data.value
        if data.name.find('cadence') >= 0 and data.name.find('cadence') < 8:
            new_record['cadence'] = data.value
        if data.name.find('latitude') >= 0:
            new_record['lat'] = data.value
        if data.name.find('longitude') >= 0:
            new_record['lon'] = data.value
        if data.name.find('elevation') >= 0:
            new_record['elev'] = data.value
        if data.name.find('timestamp') >= 0:
            new_record['ts'] = data.value
        if data.name.find('heart_rate') >= 0:
            new_record['heart_rate'] = data.value
        if data.name.find('fractional_cadence') >= 0:
            new_record['frac_cadence'] = data.value
        if data.name.find('speed') >= 0:
            new_record['gs'] = data.value
        if data.name.find('enhanced_speed') >= 0:
            new_record['e_gs'] = data.value
        if data.name.find('power') >= 0:
            new_record['m_power'] = data.value
        if data.name.find('resistance') >= 0:
            new_record['resist'] = data.value
        if data.name.find('temperature') >= 0:
            new_record['amb_temp'] = data.value
        if data.name.find('distance') >= 0:
            new_record['dist'] = data.value
    
    return new_record

In [None]:
def read_FIT_file(fitfilename):
    # Load the FIT file
    fitfile = fitparse.FitFile(fitfilename)

    # Iterate over all messages of type "record"
    # (other types include "device_info", "file_creator", "event", etc)
    # accumulated power in kJ

    track = pd.DataFrame()
    #msg_count = 0 #debug
    #n_msg = 300 #debug
    
    for record in fitfile.get_messages("record"):

        # Records can contain multiple pieces of data (ex: timestamp, latitude, longitude, etc)
        new_record = get_data_from_record(record)
        track = track.append(new_record, ignore_index=True)
        #print(new_record) #debub
        #msg_count += 1 #debug
        #if msg_count > n_msg: break #debug
    track['delta_t'] = track['ts'].diff().dt.total_seconds()
    track['delta_cad'] = track['cadence'].diff()
    track.dropna(inplace=True)
    # clean up the index after dropping first row
    track.reset_index(0, inplace=True)
    track.drop('index', axis=1, inplace=True)
    #track.set_index('ts', inplace=True)
    return track

In [None]:
directories = ['./sample_data']

data_files = []
for directory in directories:
    for filename in os.listdir(directory):
        #if filename.endswith(".zip") or filename.endswith(".py"): 
        if filename.endswith(".fit"): 
            #print(os.path.join(directory, filename))
            data_files.append(os.path.join(directory, filename))
        else:
            continue

In [None]:
tracks = pd.DataFrame()
for input_file in data_files:
    
    print(f'Building track database for file {input_file}')
    start_time = time.time()

    if input_type == 'FIT':
        track = read_FIT_file(input_file)
    if input_type == 'TCX':
        laps_df, track = get_dataframes(input_file)
    if input_type == 'GPX':
        track = read_GPX_file(input_file)
    end_time = time.time()
    print()
    print('Track build time was {:1.1f} seconds'.format(end_time - start_time))
    tracks = tracks.append(track)

In [None]:
tracks.head()