In [1]:
import sys, os, time, re
import pandas as pd
import pyarrow

sys.path.append('../cpp/fit-build')
from pyfitparquet_so import FPTransformer

In [2]:
# USER: must set FIT_DATA_DIR to location of .fit files
# Defaults PARQUET_DIR to subdirectory within FIT_DATA_DIR

FIT_DATA_DIR = '../../data'
PARQUET_DIR = FIT_DATA_DIR + '/parquet'

try: os.mkdir(PARQUET_DIR)
except FileExistsError: pass

In [3]:
# Serializes all files in FIT_DATA_DIR

parquetfiles = []
transformer = FPTransformer()
for file in os.listdir(FIT_DATA_DIR):
#{
    matchobj = re.match('(\w+).fit', file)
    if matchobj:
    #{
        fitfile = f"{FIT_DATA_DIR}/{file}"
        parquetfile = f"{PARQUET_DIR}/{matchobj.group(1)}.parquet"
        print(f"Serializing {fitfile} => {parquetfile}")
        
        initial = time.time()
        if transformer.fit_to_parquet(fitfile, parquetfile) == 0:
            print(f"Serialization completed in {time.time()-initial:.3f} sec\n")
            parquetfiles.append(parquetfile)
    #}
#}

Serializing ../../data/Bolt_GPS.fit => ../../data/parquet/Bolt_GPS.parquet
Serialization completed in 13.264 sec

Serializing ../../data/Who_Dares_Whoop.fit => ../../data/parquet/Who_Dares_Whoop.parquet
Serialization completed in 0.161 sec

Serializing ../../data/Who_Dares_Bolt.fit => ../../data/parquet/Who_Dares_Bolt.parquet
Serialization completed in 0.878 sec

Serializing ../../data/Who_Dares_Sufferfest.fit => ../../data/parquet/Who_Dares_Sufferfest.parquet
Serialization completed in 0.587 sec



In [4]:
# Opens transformed parquet files into
# dataframes and display their summaries

dframes = {}
for pfile in parquetfiles:
    pdframe = pd.read_parquet(pfile, engine='pyarrow')
    dframes[pfile] = pdframe
    print(pfile + ":")
    display(pdframe)
    #display(dframe[60:120])
    print()

../../data/parquet/Bolt_GPS.parquet:


Unnamed: 0,source_filetype,source_filename,manufacturer_name,product_index,timestamp,mesg_name,field_index,field_name,field_type,value_string,value_float,units
0,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 16:48:11,file_id,4,time_created,float,967740491.0,9.677405e+08,
1,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 16:48:11,file_id,0,type,float,4.0,4.000000e+00,
2,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 16:48:11,file_id,1,manufacturer,float,32.0,3.200000e+01,
3,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 16:48:11,file_id,2,product,float,31.0,3.100000e+01,
4,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 16:48:11,file_id,3,serial_number,float,3338297344.0,3.338297e+09,
...,...,...,...,...,...,...,...,...,...,...,...,...
360351,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 22:49:26,activity,1,num_sessions,float,1.0,1.000000e+00,
360352,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 22:49:26,activity,2,type,float,0.0,0.000000e+00,
360353,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 22:49:26,activity,3,event,float,26.0,2.600000e+01,
360354,FIT,Bolt_GPS.fit,WAHOO_FITNESS,31,2020-08-30 22:49:26,activity,4,event_type,float,1.0,1.000000e+00,



../../data/parquet/Who_Dares_Whoop.parquet:


Unnamed: 0,source_filetype,source_filename,manufacturer_name,product_index,timestamp,mesg_name,field_index,field_name,field_type,value_string,value_float,units
0,FIT,Who_Dares_Whoop.fit,WHOOP,3,NaT,file_id,1,manufacturer,float,305.0,305.0,
1,FIT,Who_Dares_Whoop.fit,WHOOP,3,NaT,file_id,0,type,float,4.0,4.0,
2,FIT,Who_Dares_Whoop.fit,WHOOP,3,NaT,file_id,2,product,float,3.0,3.0,
3,FIT,Who_Dares_Whoop.fit,WHOOP,3,NaT,file_id,3,serial_number,float,12345.0,12345.0,
4,FIT,Who_Dares_Whoop.fit,WHOOP,3,2020-11-25 19:00:06,session,253,timestamp,float,975265206.0,975265206.0,s
...,...,...,...,...,...,...,...,...,...,...,...,...
5407,FIT,Who_Dares_Whoop.fit,WHOOP,3,2020-11-25 19:43:21,record,3,heart_rate,float,135.0,135.0,bpm
5408,FIT,Who_Dares_Whoop.fit,WHOOP,3,2020-11-25 19:43:22,record,253,timestamp,float,975267802.0,975267802.0,s
5409,FIT,Who_Dares_Whoop.fit,WHOOP,3,2020-11-25 19:43:22,record,3,heart_rate,float,133.0,133.0,bpm
5410,FIT,Who_Dares_Whoop.fit,WHOOP,3,2020-11-25 19:43:23,record,253,timestamp,float,975267803.0,975267803.0,s



../../data/parquet/Who_Dares_Bolt.parquet:


Unnamed: 0,source_filetype,source_filename,manufacturer_name,product_index,timestamp,mesg_name,field_index,field_name,field_type,value_string,value_float,units
0,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 18:48:44,file_id,4,time_created,float,975264524.0,9.752645e+08,
1,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 18:48:44,file_id,0,type,float,4.0,4.000000e+00,
2,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 18:48:44,file_id,1,manufacturer,float,32.0,3.200000e+01,
3,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 18:48:44,file_id,2,product,float,31.0,3.100000e+01,
4,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 18:48:44,file_id,3,serial_number,float,3338297344.0,3.338297e+09,
...,...,...,...,...,...,...,...,...,...,...,...,...
36394,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 19:43:38,activity,1,num_sessions,float,1.0,1.000000e+00,
36395,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 19:43:38,activity,2,type,float,0.0,0.000000e+00,
36396,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 19:43:38,activity,3,event,float,26.0,2.600000e+01,
36397,FIT,Who_Dares_Bolt.fit,WAHOO_FITNESS,31,2020-11-25 19:43:38,activity,4,event_type,float,1.0,1.000000e+00,



../../data/parquet/Who_Dares_Sufferfest.parquet:


Unnamed: 0,source_filetype,source_filename,manufacturer_name,product_index,timestamp,mesg_name,field_index,field_name,field_type,value_string,value_float,units
0,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,0,type,float,4.0,4.0,
1,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,1,manufacturer,float,282.0,282.0,
2,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,2,product,float,1231.0,1231.0,
3,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,3,serial_number,float,12345.0,12345.0,
4,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,4,time_created,float,975264483.0,975264483.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
22904,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,5,local_timestamp,float,0.0,0.0,
22905,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,1,num_sessions,float,1.0,1.0,
22906,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,2,type,float,0.0,0.0,
22907,FIT,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,3,event,float,26.0,26.0,





In [7]:
# Test re-initialization from configuation file
# USER: make some change to parquet_config.yml

initial = time.time()
transformer.reset_from_config()
print(f"Re-serializing {fitfile} => {parquetfile}")
if transformer.fit_to_parquet(fitfile, parquetfile) == 0:
    print(f"Serialization completed in {time.time()-initial:.3f} sec\n")
    dframes[parquetfile] = pd.read_parquet(parquetfile, engine='pyarrow')
    display(dframes[parquetfile])

Re-serializing ../../data/Who_Dares_Sufferfest.fit => ../../data/parquet/Who_Dares_Sufferfest.parquet
Serialization completed in 0.695 sec



Unnamed: 0,source_filename,manufacturer_name,product_index,timestamp,mesg_name,field_index,field_name,field_type,value_string,value_float,units
0,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,0,type,float,4.0,4.0,
1,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,1,manufacturer,float,282.0,282.0,
2,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,2,product,float,1231.0,1231.0,
3,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,3,serial_number,float,12345.0,12345.0,
4,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 18:48:03,file_id,4,time_created,float,975264483.0,975264483.0,
...,...,...,...,...,...,...,...,...,...,...,...
22904,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,5,local_timestamp,float,0.0,0.0,
22905,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,1,num_sessions,float,1.0,1.0,
22906,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,2,type,float,0.0,0.0,
22907,Who_Dares_Sufferfest.fit,THE_SUFFERFEST,1231,2020-11-25 19:42:13,activity,3,event,float,26.0,26.0,


In [8]:
for pfile in dframes.keys():
    print(pfile + ":")
    print(dframes[pfile].dtypes)
    print()

../../data/parquet/Bolt_GPS.parquet:
source_filetype              object
source_filename              object
manufacturer_name            object
product_index                 int32
timestamp            datetime64[ns]
mesg_name                    object
field_index                   int32
field_name                   object
field_type                   object
value_string                 object
value_float                 float64
units                        object
dtype: object

../../data/parquet/Who_Dares_Whoop.parquet:
source_filetype              object
source_filename              object
manufacturer_name            object
product_index                 int32
timestamp            datetime64[ns]
mesg_name                    object
field_index                   int32
field_name                   object
field_type                   object
value_string                 object
value_float                 float64
units                        object
dtype: object

../../data/parquet/Who_Da