Skip to content

Commit 45915f2

Browse files
committed
Refactor tcx extraction
1 parent 8769453 commit 45915f2

File tree

5 files changed

+50
-53
lines changed

5 files changed

+50
-53
lines changed

Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
3232
--mount=type=bind,source=requirements.txt,target=requirements.txt \
3333
python -m pip install -r requirements.txt
3434

35+
# Copy the source code into the container.
36+
COPY . .
37+
38+
# Setup zig executable
39+
RUN python build_zig.py
40+
3541
# Switch to the non-privileged user to run the application.
3642
USER dockeruser
3743

38-
# Copy the source code into the container.
39-
COPY . .
44+
# Give dockeruser a few rights
4045
ADD --chown=dockeruser --chmod=700 ./activity_files ./activity_files
4146
ADD --chown=dockeruser --chmod=700 ./.token ./.token
4247

build_zig.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# The poor man's solution to getting zig executable for machine in question
2+
import tcx_extract as tcx
3+
4+
tcx.build_zig()

lib/file_manager/file_manager.py

Lines changed: 36 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pandas as pd
66
from bs4 import BeautifulSoup
77
from dotenv import load_dotenv
8+
import tcx_extract as tcx
89

910
load_dotenv()
1011

@@ -30,60 +31,45 @@ def save_activity_raw_file(activity_id, activity_raw_data):
3031
def to_unix(timestamp):
3132
unix_time_start = pd.Timestamp("1970-01-01").tz_localize("UTC")
3233
increment = pd.Timedelta("1s")
33-
timestamp = pd.Timestamp(timestamp)
34+
timestamp = pd.to_datetime(timestamp, errors='coerce')
3435
return (timestamp - unix_time_start) // increment
3536

3637
def convert_activity_file(
3738
activity_id, original_file_path, format="csv"
3839
):
39-
selectors = {
40-
"heart_rate": "HeartRateBpm > Value",
41-
"watts": "ns3:Watts",
42-
"time": "Time",
43-
}
44-
with open(f"{original_file_path}", "rb") as file:
45-
soup = BeautifulSoup(file, features="lxml-xml")
46-
track_points = soup.find_all('Trackpoint')
47-
if len(track_points) == 0:
48-
return False
49-
data = []
50-
for track_point in track_points:
51-
time_point = track_point.find(selectors['time']).text
52-
try:
53-
heart_rate_point = track_point.select(selectors['heart_rate'])[0].text
54-
except:
55-
heart_rate_point = 0
56-
try:
57-
watts_point = track_point.find(selectors['watts']).text
58-
except:
59-
watts_point = 0
60-
61-
data.append({
62-
'time': to_unix(time_point),
63-
'heart_rate': int(heart_rate_point),
64-
'watts': int(watts_point)
65-
})
66-
67-
data_df = pd.DataFrame(data)
68-
data_dict = data_df.to_dict(orient='list')
40+
time_points = tcx.extract(original_file_path, "Time")
41+
watts = tcx.extract(original_file_path, "ns3:Watts")
42+
heart_rate = tcx.extract(original_file_path, "Value")
43+
44+
data_df = pd.DataFrame({
45+
'time': time_points,
46+
'heart_rate': heart_rate,
47+
'watts': watts
48+
})
49+
50+
data_df['time'] = to_unix(data_df['time']).fillna(0).astype('int64')
51+
data_df['heart_rate'] = pd.to_numeric(data_df['heart_rate'], errors='coerce').fillna(0).astype('uint16')
52+
data_df['watts'] = pd.to_numeric(data_df['watts'], errors='coerce').fillna(0).astype('uint16')
53+
54+
data_dict = data_df.to_dict(orient='list')
6955

70-
activity_data = {
71-
"activity_id": activity_id,
72-
"start_time": data_df.iloc[0]['time'],
73-
"data": data_dict
56+
activity_data = {
57+
"activity_id": activity_id,
58+
"start_time": data_df.iloc[0]['time'],
59+
"data": data_dict
60+
}
61+
df = pd.DataFrame([activity_data])
62+
df = df.astype(
63+
{
64+
"activity_id": "int64",
65+
"start_time": "int64",
66+
"data": "string"
7467
}
75-
df = pd.DataFrame([activity_data])
76-
df = df.astype(
77-
{
78-
"activity_id": "int64",
79-
"start_time": "int64",
80-
"data": "string"
81-
}
82-
)
83-
if format == "csv":
84-
converted_file_path = f"{CONVERTED_FILES_PATH}/{activity_id}.csv"
85-
df.to_csv(converted_file_path, index=False)
86-
if format == "parquet":
87-
converted_file_path = f"{CONVERTED_FILES_PATH}/{activity_id}.parquet"
88-
df.to_parquet(converted_file_path, index=False)
89-
return converted_file_path
68+
)
69+
if format == "csv":
70+
converted_file_path = f"{CONVERTED_FILES_PATH}/{activity_id}.csv"
71+
df.to_csv(converted_file_path, index=False)
72+
if format == "parquet":
73+
converted_file_path = f"{CONVERTED_FILES_PATH}/{activity_id}.parquet"
74+
df.to_parquet(converted_file_path, index=False)
75+
return converted_file_path

main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import logging
3+
import time
34
from lib.api.garmin import garmin
45
from lib.file_manager import file_manager
56
from dotenv import load_dotenv

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ numpy==1.26.3
88
pandas==2.2.0
99
pyarrow==15.0.0
1010
python-dotenv==1.0.0
11-
google-cloud-storage==2.14.0
11+
google-cloud-storage==2.14.0
12+
tcx_extract==0.1.0.1

0 commit comments

Comments
 (0)