In [1]:
import pandas as pd
from st_hsdatalog.HSD.HSDatalog import HSDatalog
# import itertools as it

from acquisitioninfo import AcquisitionInfo
from deviceconfig import Device
from data import ComponentData

LF = "\n"

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
hsdatalog_factory = HSDatalog()


In [3]:
hsd = hsdatalog_factory.create_hsd("logs/20240212_12_44_00")

acq = AcquisitionInfo.model_validate(hsd.get_acquisition_info())
acq

2024-02-19 11:10:38,256 - HSDatalogApp.st_pnpl.DTDL.device_template_manager - INFO - dtmi found in locally in base supported models
2024-02-19 11:10:38,257 - HSDatalogApp.st_pnpl.DTDL.device_template_manager - INFO - dtmi: dtmi/appconfig/steval_mkboxpro/FP_SNS_DATALOG2_Datalog2-2.json


AcquisitionInfo(name='20240212_12_44_00', description='20240212_12_44_00', uuid='27675a99-3ffe-4cbc-9017-9c333f240a5a', start_time=datetime.datetime(2000, 1, 1, 1, 9, 17, tzinfo=TzInfo(UTC)), end_time=datetime.datetime(2000, 1, 1, 1, 9, 36, tzinfo=TzInfo(UTC)), tag_events=[TagEvent(label='stationary_not_upright', is_set=True, timestamp=datetime.datetime(2000, 1, 1, 1, 9, 18, 295000, tzinfo=TzInfo(UTC))), TagEvent(label='stationary_not_upright', is_set=False, timestamp=datetime.datetime(2000, 1, 1, 1, 9, 21, 917000, tzinfo=TzInfo(UTC))), TagEvent(label='stationary_upright', is_set=True, timestamp=datetime.datetime(2000, 1, 1, 1, 9, 23, 194000, tzinfo=TzInfo(UTC))), TagEvent(label='stationary_upright', is_set=False, timestamp=datetime.datetime(2000, 1, 1, 1, 9, 27, 321000, tzinfo=TzInfo(UTC))), TagEvent(label='motion', is_set=True, timestamp=datetime.datetime(2000, 1, 1, 1, 9, 28, 271000, tzinfo=TzInfo(UTC))), TagEvent(label='motion', is_set=False, timestamp=datetime.datetime(2000, 1, 1,

In [4]:
device = Device.model_validate(hsd.get_device())

components = device.get_components()
components

(Component(name='lsm6dsv16x_acc', odr=480, fs=4.0, enable=True, samples_per_ts=480, dim=3, ioffset=0.1812172532081604, measodr=487.7249450683594, usb_dps=144, sd_dps=15360, sensitivity=0.0001219999976456165, data_type='int16', sensor_category=0, c_type=0, stream_id=1, ep_id=1, unit='gForce'),
 Component(name='lsm6dsv16x_gyro', odr=3840, fs=4000.0, enable=True, samples_per_ts=1000, dim=3, ioffset=0.1812172532081604, measodr=3901.799560546875, usb_dps=1152, sd_dps=15360, sensitivity=140.0, data_type='int16', sensor_category=0, c_type=0, stream_id=0, ep_id=0, unit='mdps'))

In [5]:
with_data_frames = tuple({"name": sensor.name, "sensor": sensor, "data_frame": hsd.get_dataframe(sensor.name, labeled=True)} for sensor in components)

component_data = tuple(
    ComponentData(item["name"], item["sensor"], item["data_frame"]) for item in with_data_frames
)

component_data

(ComponentData(name='lsm6dsv16x_acc', component=Component(name='lsm6dsv16x_acc', odr=480, fs=4.0, enable=True, samples_per_ts=480, dim=3, ioffset=0.1812172532081604, measodr=487.7249450683594, usb_dps=144, sd_dps=15360, sensitivity=0.0001219999976456165, data_type='int16', sensor_category=0, c_type=0, stream_id=1, ep_id=1, unit='gForce'), data=           Time    A_x [g]    A_y [g]   A_z [g]  motion  shaken  \
 0      0.181217  -0.979782   0.023058  0.193126   False   False   
 1      0.183268  -0.977830   0.020862  0.191784   False   False   
 2      0.185318  -0.980026   0.023302  0.195566   False   False   
 3      0.187368  -0.978806   0.022936  0.191784   False   False   
 4      0.189419  -0.979660   0.021838  0.192882   False   False   
 ...         ...        ...        ...       ...     ...     ...   
 9115  18.869910  -0.144326  -0.541070  1.514874   False    True   
 9116  18.871960  -0.099552  -0.509716  1.437404   False    True   
 9117  18.874010  -0.075396  -0.474580  1.3

In [6]:
tag_set = acq.tag_set()
tag_set

{'motion', 'shaken', 'stationary_not_upright', 'stationary_upright'}

In [7]:
grouped_by_tag = {
    key: [] for key in tag_set
}

def group_by_key_fn(data_frame: pd.DataFrame):
    def group_by_key(index: int) -> str:
        for tag in tag_set:
            col = data_frame[tag]
            
            if col[index]:
                return tag

    return group_by_key

for item in component_data:
    data_frame = item.data
    group_by = data_frame.groupby(by=group_by_key_fn(data_frame))

    for name, data_frame in group_by:
        grouped_by_tag[name].append(ComponentData(item.name, item.component, data_frame.drop(columns=tag_set)))

for tag, component_data_seq in grouped_by_tag.items():
    print(tag)
    for component_data in component_data_seq:
        print(component_data.data.describe())

stationary_not_upright
            Time    A_x [g]   A_y [g]   A_z [g]
count       1765       1765      1765      1765
unique      1765         61        68        64
top     4.913352  -0.979660  0.022936  0.193492
freq           1         95        78        75
            Time   G_x [mdps]  G_y [mdps] G_z [mdps]
count      14132        14132       14132      14132
unique     14132           13          15         12
top     4.916684  -700.000000  280.000000   0.000000
freq           1         3354        3051       4467
stationary_upright
             Time    A_x [g]   A_y [g]   A_z [g]
count        2012       2012      2012      2012
unique       2012         77        68        62
top     10.317994  -0.001708  0.012810  1.005158
freq            1         97        94       105
             Time   G_x [mdps]  G_y [mdps]   G_z [mdps]
count       16102        16102       16102        16102
unique      16102           14          15           13
top     10.320556  -560.000000  140.0000

In [8]:
grouped_by_tag_collated = {
    key: ComponentData.collate(value) for key, value in grouped_by_tag.items()
}

grouped_by_tag_collated

{'stationary_not_upright':          A_x [g]   A_y [g]   A_z [g]    G_x [mdps]  G_y [mdps]   G_z [mdps]
 544    -0.980270  0.022570  0.191296           NaN         NaN          NaN
 545    -0.978684  0.020862  0.194224           NaN         NaN          NaN
 546    -0.980026  0.022692  0.192272           NaN         NaN          NaN
 547    -0.980270  0.023180  0.192760           NaN         NaN          NaN
 548    -0.980392  0.023180  0.194590           NaN         NaN          NaN
 ...          ...       ...       ...           ...         ...          ...
 18450        NaN       NaN       NaN   -980.000000  560.000000  -840.000000
 18464        NaN       NaN       NaN   -980.000000  700.000000  -560.000000
 18474        NaN       NaN       NaN   -840.000000  420.000000  -700.000000
 18475        NaN       NaN       NaN  -1120.000000  280.000000  -700.000000
 18476        NaN       NaN       NaN  -1260.000000  140.000000  -560.000000
 
 [2389 rows x 6 columns],
 'stationary_upright':