In [1]:
%load_ext autoreload
%autoreload 2
%load_ext lab_black

In [2]:
import os
from pyspark.sql import Column, DataFrame, SparkSession, functions as F, Window


spark = (
    SparkSession.builder.master("local[*]")
    .appName("fitviz")
    .config("spark.driver.memory", "8g")
    .config(
        "spark.jars.packages",
        "io.delta:delta-core_2.12:0.7.0,org.apache.hadoop:hadoop-aws:2.7.7",
    )
    .config(
        "spark.delta.logStore.class",
        "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore",
    )
    .config("spark.hadoop.fs.s3a.access.key", os.getenv("AWS_ACCESS_KEY_ID", None))
    .config("spark.hadoop.fs.s3a.secret.key", os.getenv("AWS_SECRET_ACCESS_KEY", None))
    .getOrCreate()
)

from delta.tables import DeltaTable

In [3]:
import os
import datetime
import altair as alt
import numpy as np
import pandas as pd

from pathlib import Path


from fitviz import FitFile

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 500)

In [27]:
%%time
bolt = FitFile("fit_files/Who_Dares_Bolt.fit", reprocess=True)
sufferfest = FitFile("fit_files/Who_Dares_Sufferfest.fit", reprocess=True)
whoop = FitFile("fit_files/Who_Dares_WHOOP.fit", reprocess=True)
bolt_w_gps = FitFile("fit_files/Bolt_GPS.fit", reprocess=True)

2020-12-01 18:54:36.123 | INFO     | fitviz.FitFile:_process_fit2csv:83 - Removing /Users/johnmdonich/fitvis-tmp/Who_Dares_Bolt-18677721.csv for reprocessing
2020-12-01 18:54:36.635 | INFO     | fitviz.FitFile:_process_fit2csv:98 - FIT CSV Tool - Protocol 2.0 Profile 21.40 Release
FIT binary file fit_files/Who_Dares_Bolt.fit decoded to /Users/johnmdonich/fitvis-tmp/Who_Dares_Bolt-18677721*.csv files.

2020-12-01 18:54:37.064 | INFO     | fitviz.FitFile:_process_fit2csv:83 - Removing /Users/johnmdonich/fitvis-tmp/Who_Dares_Sufferfest-18674217.csv for reprocessing
2020-12-01 18:54:37.480 | INFO     | fitviz.FitFile:_process_fit2csv:98 - FIT CSV Tool - Protocol 2.0 Profile 21.40 Release
FIT binary file fit_files/Who_Dares_Sufferfest.fit decoded to /Users/johnmdonich/fitvis-tmp/Who_Dares_Sufferfest-18674217*.csv files.

2020-12-01 18:54:37.858 | INFO     | fitviz.FitFile:_process_fit2csv:83 - Removing /Users/johnmdonich/fitvis-tmp/Who_Dares_WHOOP-18677504.csv for reprocessing
2020-12-01 18

CPU times: user 222 ms, sys: 80.5 ms, total: 302 ms
Wall time: 4.03 s


In [30]:
attr = "file"
bolt.long_df.where(F.col("message") == attr).orderBy("timestamp", "message_id").show(32)
sufferfest.long_df.where(F.col("message") == attr).orderBy("timestamp").show()
whoop.long_df.where(F.col("message") == attr).orderBy("timestamp").show()

+-----------+------------+-------+-----------+-------------------+-------------+-------------+-----+
|record_type|local number|message| message_id|          timestamp|        field|        value|units|
+-----------+------------+-------+-----------+-------------------+-------------+-------------+-----+
|       Data|           0|   file|-1665041827|2020-11-25 13:48:44| time_created|    975264524| null|
|       Data|           0|   file|-1665041827|2020-11-25 13:48:44| manufacturer|wahoo_fitness| null|
|       Data|           0|   file|-1665041827|2020-11-25 13:48:44|serial_number|   3338297344| null|
|       Data|           0|   file|-1665041827|2020-11-25 13:48:44|         file|     activity| null|
|       Data|           0|   file|-1665041827|2020-11-25 13:48:44|      product|           31| null|
+-----------+------------+-------+-----------+-------------------+-------------+-------------+-----+

+-----------+------------+-------+----------+-------------------+-------------+-----------

In [42]:
sufferfest.long_df.where(F.col("message") == "record").select(
    "*", F.lit("N/A").alias("device")
).select("timestamp", "device", "field", "value", "units").orderBy("timestamp").show()

+-------------------+------+--------------+---------+-----+
|          timestamp|device|         field|    value|units|
+-------------------+------+--------------+---------+-----+
|2020-11-25 13:48:03|   N/A|enhanced_speed|      0.0|  m/s|
|2020-11-25 13:48:03|   N/A|      distance|      0.0|    m|
|2020-11-25 13:48:03|   N/A|         power|        0|watts|
|2020-11-25 13:48:03|   N/A|    heart_rate|       64|  bpm|
|2020-11-25 13:48:03|   N/A|         speed|      0.0|  m/s|
|2020-11-25 13:48:03|   N/A|       cadence|        0|  rpm|
|2020-11-25 13:48:03|   N/A|     timestamp|975264483|    s|
|2020-11-25 13:48:04|   N/A|    heart_rate|       64|  bpm|
|2020-11-25 13:48:04|   N/A|         power|        0|watts|
|2020-11-25 13:48:04|   N/A|enhanced_speed|      0.0|  m/s|
|2020-11-25 13:48:04|   N/A|     timestamp|975264484|    s|
|2020-11-25 13:48:04|   N/A|       cadence|        0|  rpm|
|2020-11-25 13:48:04|   N/A|      distance|      0.0|    m|
|2020-11-25 13:48:04|   N/A|         spe

In [40]:
# bolt_w_gps.long_df.groupBy("field").agg(F.count("*")).show(100)

In [12]:
%%time
bolt_pdf = bolt_w_gps.long_df.where(F.col("message") == "record").toPandas()

CPU times: user 27.5 s, sys: 373 ms, total: 27.9 s
Wall time: 30.8 s


In [24]:
# sufferfest.csv_df.show(50)

In [23]:
sufferfest.csv_df.show(truncate=False)

+-----------+------------+-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|record_type|local number|message    |fields                                                                                                                                                              |
+-----------+------------+-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|Data       |0           |file_id    |[[type, 4,], [manufacturer, 282,], [product, 1231,], [serial_number, 12345,], [time_created, 975264483,]]                                                           |
|Data       |0           |device_info|[[timestamp, 975267819, s], [battery_status, 2,]]                                                                                                 

In [20]:
bolt.sort_values(["timestamp", "message_id"]).head(25)

Unnamed: 0,record_type,local number,message,message_id,timestamp,field,value,units
39720,Data,0,record,1016102544,2020-08-30 12:48:10,timestamp,967740490.0,s
51226,Data,0,record,1016102544,2020-08-30 12:48:10,distance,0.0,m
170364,Data,0,record,1016102544,2020-08-30 12:48:10,speed,0.759,m/s
192291,Data,0,record,1016102544,2020-08-30 12:48:10,enhanced_speed,0.759,m/s
338892,Data,0,record,1016102544,2020-08-30 12:48:10,heart_rate,96.0,bpm
8761,Data,0,record,765652872,2020-08-30 12:48:11,distance,0.55,m
13571,Data,0,record,765652872,2020-08-30 12:48:11,enhanced_speed,0.794,m/s
78985,Data,0,record,765652872,2020-08-30 12:48:11,timestamp,967740491.0,s
170366,Data,0,record,765652872,2020-08-30 12:48:11,speed,0.794,m/s
218006,Data,0,record,765652872,2020-08-30 12:48:11,power,0.0,watts


In [13]:
bolt_w_gps.csv_df.count()

22028

In [34]:
%%time
sufferfest_records = sufferfest.long_df.where(F.col("message") == "record").toPandas()

CPU times: user 1.65 s, sys: 20.7 ms, total: 1.67 s
Wall time: 2.12 s


In [36]:
sufferfest_records.shape

(22750, 8)

In [157]:
sufferfest_records.field.unique()

array(['cadence', 'distance', 'enhanced_speed', 'heart_rate', 'power',
       'speed', 'timestamp'], dtype=object)

In [35]:
import altair as alt

alt.Chart(sufferfest_records[sufferfest_records.field == "power"]).mark_bar().encode(
    x="timestamp:T", y="value:Q"
)

In [173]:
sufferfest.long_df.rdd.getNumPartitions()

8

In [117]:
# bolt.long_df.where(F.col("message") == "device_info").toPandas()

In [14]:
whoop.long_df.groupBy("message").agg(F.count("message").alias("records")).show()

+-------+-------+
|message|records|
+-------+-------+
|   file|      4|
|session|     11|
| record|   5397|
+-------+-------+



In [9]:
bolt.long_df.show()

+-----------+------------+-----------+-----------+-------------------+-------------------+----------+-------+
|record_type|local number|    message| message_id|          timestamp|              field|     value|  units|
+-----------+------------+-----------+-----------+-------------------+-------------------+----------+-------+
|       Data|           0|device_info|  462092124|2020-11-25 14:01:49|antplus_device_type|bike_power|   null|
|       Data|           0|device_info| 2098285500|2020-11-25 14:41:49|antplus_device_type|bike_power|   null|
|       Data|           0|device_info| -835262131|2020-11-25 14:31:49|antplus_device_type|bike_power|   null|
|       Data|           0|device_info| -257609311|2020-11-25 13:48:44|antplus_device_type|bike_power|   null|
|       Data|           0|device_info|  682523891|2020-11-25 14:11:49|antplus_device_type|bike_power|   null|
|       Data|           0|device_info|  708168908|2020-11-25 14:21:49|antplus_device_type|bike_power|   null|
|       Da

In [21]:
from fitSDK import PROFILE