Handle void arrays when converting to Pandas dataframe (#189)

adamreeve · Apr 29, 2020 · 28b268c · 28b268c
1 parent 5fac269
commit 28b268c
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 2 deletions.
diff --git a/nptdms/export/pandas_export.py b/nptdms/export/pandas_export.py
@@ -1,3 +1,4 @@
+import numpy as np
 from nptdms.utils import OrderedDict
 
 
@@ -65,13 +66,24 @@ def _channels_to_dataframe(channels_to_export, time_index=False, absolute_time=F
     for column_name, channel in channels_to_export.items():
         index = channel.time_track(absolute_time) if time_index else None
         if scaled_data:
-            dataframe_dict[column_name] = pd.Series(data=channel.data, index=index)
+            dataframe_dict[column_name] = pd.Series(data=_array_for_pd(channel.data), index=index)
         elif channel.scaler_data_types:
             # Channel has DAQmx raw data
             for scale_id, raw_data in channel.raw_scaler_data.items():
                 scaler_column_name = column_name + "[{0:d}]".format(scale_id)
                 dataframe_dict[scaler_column_name] = pd.Series(data=raw_data, index=index)
         else:
             # Raw data for normal TDMS file
-            dataframe_dict[column_name] = pd.Series(data=channel.raw_data, index=index)
+            dataframe_dict[column_name] = pd.Series(data=_array_for_pd(channel.raw_data), index=index)
     return pd.DataFrame.from_dict(dataframe_dict)
+
+
+def _array_for_pd(array):
+    """ Convert data array to a format suitable for a Pandas dataframe
+    """
+    if np.issubdtype(array.dtype, np.dtype('void')):
+        # If dtype is void then the array must also be empty.
+        # Pandas doesn't like void data types, so these are converted to empty float64 arrays
+        # and Pandas will fill values with NaN
+        return np.empty(0, dtype='float64')
+    return array
diff --git a/nptdms/test/test_pandas.py b/nptdms/test/test_pandas.py
@@ -13,6 +13,8 @@
 from nptdms.test.util import (
     GeneratedFile,
     basic_segment,
+    channel_metadata,
+    channel_metadata_with_no_data,
     string_hexlify,
     segment_objects_metadata,
     hexlify_value
@@ -312,3 +314,28 @@ def test_raw_daqmx_channel_export():
     assert dataframe["/'Group'/'Channel1'[1]"].dtype == np.int16
     np.testing.assert_equal(dataframe["/'Group'/'Channel1'[0]"], expected_data[0])
     np.testing.assert_equal(dataframe["/'Group'/'Channel1'[1]"], expected_data[1])
+
+
+def test_export_with_empty_channels():
+    """Convert a group to dataframe when a channel has empty data and void data type"""
+
+    test_file = GeneratedFile()
+    test_file.add_segment(
+        ("kTocMetaData", "kTocRawData", "kTocNewObjList"),
+        segment_objects_metadata(
+            channel_metadata("/'group'/'channel1'", 3, 2),
+            channel_metadata_with_no_data("/'group'/'channel2'"),
+        ),
+        "01 00 00 00 02 00 00 00"
+    )
+
+    tdms_data = test_file.load()
+
+    df = tdms_data["group"].as_dataframe()
+    assert len(df) == 2
+    assert len(df.keys()) == 2
+    assert "channel1" in df.keys()
+    assert "channel2" in df.keys()
+    assert (df["channel1"] == [1, 2]).all()
+    assert len(df["channel2"]) == 2
+    assert np.isnan(df["channel2"]).all()