5
5
import pandas as pd
6
6
from bs4 import BeautifulSoup
7
7
from dotenv import load_dotenv
8
+ import tcx_extract as tcx
8
9
9
10
load_dotenv ()
10
11
@@ -30,60 +31,45 @@ def save_activity_raw_file(activity_id, activity_raw_data):
30
31
def to_unix (timestamp ):
31
32
unix_time_start = pd .Timestamp ("1970-01-01" ).tz_localize ("UTC" )
32
33
increment = pd .Timedelta ("1s" )
33
- timestamp = pd .Timestamp (timestamp )
34
+ timestamp = pd .to_datetime (timestamp , errors = 'coerce' )
34
35
return (timestamp - unix_time_start ) // increment
35
36
36
37
def convert_activity_file (
37
38
activity_id , original_file_path , format = "csv"
38
39
):
39
- selectors = {
40
- "heart_rate" : "HeartRateBpm > Value" ,
41
- "watts" : "ns3:Watts" ,
42
- "time" : "Time" ,
43
- }
44
- with open (f"{ original_file_path } " , "rb" ) as file :
45
- soup = BeautifulSoup (file , features = "lxml-xml" )
46
- track_points = soup .find_all ('Trackpoint' )
47
- if len (track_points ) == 0 :
48
- return False
49
- data = []
50
- for track_point in track_points :
51
- time_point = track_point .find (selectors ['time' ]).text
52
- try :
53
- heart_rate_point = track_point .select (selectors ['heart_rate' ])[0 ].text
54
- except :
55
- heart_rate_point = 0
56
- try :
57
- watts_point = track_point .find (selectors ['watts' ]).text
58
- except :
59
- watts_point = 0
60
-
61
- data .append ({
62
- 'time' : to_unix (time_point ),
63
- 'heart_rate' : int (heart_rate_point ),
64
- 'watts' : int (watts_point )
65
- })
66
-
67
- data_df = pd .DataFrame (data )
68
- data_dict = data_df .to_dict (orient = 'list' )
40
+ time_points = tcx .extract (original_file_path , "Time" )
41
+ watts = tcx .extract (original_file_path , "ns3:Watts" )
42
+ heart_rate = tcx .extract (original_file_path , "Value" )
43
+
44
+ data_df = pd .DataFrame ({
45
+ 'time' : time_points ,
46
+ 'heart_rate' : heart_rate ,
47
+ 'watts' : watts
48
+ })
49
+
50
+ data_df ['time' ] = to_unix (data_df ['time' ]).fillna (0 ).astype ('int64' )
51
+ data_df ['heart_rate' ] = pd .to_numeric (data_df ['heart_rate' ], errors = 'coerce' ).fillna (0 ).astype ('uint16' )
52
+ data_df ['watts' ] = pd .to_numeric (data_df ['watts' ], errors = 'coerce' ).fillna (0 ).astype ('uint16' )
53
+
54
+ data_dict = data_df .to_dict (orient = 'list' )
69
55
70
- activity_data = {
71
- "activity_id" : activity_id ,
72
- "start_time" : data_df .iloc [0 ]['time' ],
73
- "data" : data_dict
56
+ activity_data = {
57
+ "activity_id" : activity_id ,
58
+ "start_time" : data_df .iloc [0 ]['time' ],
59
+ "data" : data_dict
60
+ }
61
+ df = pd .DataFrame ([activity_data ])
62
+ df = df .astype (
63
+ {
64
+ "activity_id" : "int64" ,
65
+ "start_time" : "int64" ,
66
+ "data" : "string"
74
67
}
75
- df = pd .DataFrame ([activity_data ])
76
- df = df .astype (
77
- {
78
- "activity_id" : "int64" ,
79
- "start_time" : "int64" ,
80
- "data" : "string"
81
- }
82
- )
83
- if format == "csv" :
84
- converted_file_path = f"{ CONVERTED_FILES_PATH } /{ activity_id } .csv"
85
- df .to_csv (converted_file_path , index = False )
86
- if format == "parquet" :
87
- converted_file_path = f"{ CONVERTED_FILES_PATH } /{ activity_id } .parquet"
88
- df .to_parquet (converted_file_path , index = False )
89
- return converted_file_path
68
+ )
69
+ if format == "csv" :
70
+ converted_file_path = f"{ CONVERTED_FILES_PATH } /{ activity_id } .csv"
71
+ df .to_csv (converted_file_path , index = False )
72
+ if format == "parquet" :
73
+ converted_file_path = f"{ CONVERTED_FILES_PATH } /{ activity_id } .parquet"
74
+ df .to_parquet (converted_file_path , index = False )
75
+ return converted_file_path
0 commit comments