Add more unit tests (#177)

* Add test for appending to TDMS file under new group * Test for segment without data * Test interleaving data with different types * Remove some unused code * Support len of TdmsFile and TdmsGroup * Test KeyErrors for TdmsFile and TdmsGroup * Ignore not-implemented methods in base classes * Tidy up some duplication getting TdmsReader in TdmsFile * Tests and fixes for channels without data
adamreeve · Apr 2, 2020 · d5e2a0c · d5e2a0c
1 parent 6ac4a7c
commit d5e2a0c
Show file tree

Hide file tree

Showing 9 changed files with 273 additions and 49 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -5,3 +5,5 @@ exclude_lines =
     def __repr__
     # Ignore handling failing imports
     except ImportError
+    # Ignore not-implemented methods in base classes
+    raise NotImplementedError
diff --git a/nptdms/base_segment.py b/nptdms/base_segment.py
@@ -311,10 +311,6 @@ def read_raw_data_index(self, file, raw_data_index_header):
         """
         raise NotImplementedError("Segment metadata reading must be implemented in base classes")
 
-    @property
-    def total_raw_data_width(self):
-        raise NotImplementedError("Raw data width must be implemented in base classes")
-
     @property
     def scaler_data_types(self):
         return None

diff --git a/nptdms/common.py b/nptdms/common.py
@@ -46,6 +46,10 @@ def is_root(self):
     def is_group(self):
         return self.group is not None and self.channel is None
 
+    @property
+    def is_channel(self):
+        return self.channel is not None
+
     @staticmethod
     def from_string(path_string):
         components = list(_path_components(path_string))
@@ -56,12 +60,6 @@ def __str__(self):
         """
         return self._path
 
-    def __hash__(self):
-        return hash(self._path)
-
-    def __eq__(self, other):
-        return self._path == str(other)
-
 
 def _path_components(path):
     """ Generator that yields components within an object path

diff --git a/nptdms/reader.py b/nptdms/reader.py
@@ -2,6 +2,7 @@
 """
 
 import numpy as np
+from nptdms.common import ObjectPath
 from nptdms.utils import Timer, OrderedDict
 from nptdms.tdms_segment import read_segment_metadata
 from nptdms.base_segment import RawChannelDataChunk
@@ -67,10 +68,7 @@ def read_metadata(self):
                 self._segments.append(segment)
                 previous_segment = segment
 
-                if segment.next_segment_pos is None:
-                    break
-                else:
-                    self._file.seek(segment.next_segment_pos)
+                self._file.seek(segment.next_segment_pos)
 
     def read_raw_data(self):
         """ Read raw data from all segments, chunk by chunk
@@ -219,7 +217,7 @@ def _build_index(self):
         data_objects = [
             path
             for (path, obj) in self.object_metadata.items()
-            if obj.num_values > 0]
+            if ObjectPath.from_string(path).is_channel]
         num_segments = len(self._segments)
 
         segment_num_values = {

diff --git a/nptdms/tdms.py b/nptdms/tdms.py
@@ -175,11 +175,9 @@ def data_chunks(self):
 
         :rtype: Generator that yields :class:`DataChunk` objects
         """
-        if self._reader is None:
-            raise RuntimeError(
-                "Cannot read data chunks after the underlying TDMS reader is closed")
+        reader = self._get_reader()
         channel_offsets = defaultdict(int)
-        for chunk in self._reader.read_raw_data():
+        for chunk in reader.read_raw_data():
             yield DataChunk(self, chunk, channel_offsets)
             for path, data in chunk.channel_data.items():
                 channel_offsets[path] += len(data)
@@ -194,6 +192,11 @@ def close(self):
             self._reader.close()
             self._reader = None
 
+    def __len__(self):
+        """ Returns the number of groups in this file
+        """
+        return len(self._groups)
+
     def __iter__(self):
         """ Returns an iterator over the names of groups in this file
         """
@@ -213,6 +216,12 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
+    def _get_reader(self):
+        if self._reader is None:
+            raise RuntimeError(
+                "Cannot read data after the underlying TDMS reader is closed")
+        return self._reader
+
     def _read_file(self, tdms_reader, read_metadata_only):
         tdms_reader.read_metadata()
 
@@ -281,26 +290,19 @@ def _read_data(self, tdms_reader):
         self.data_read = True
 
     def _read_channel_data_chunks(self, channel):
-        if self._reader is None:
-            raise RuntimeError(
-                "Cannot read channel data after the underlying TDMS reader is closed")
-        for chunk in self._reader.read_raw_data_for_channel(channel.path):
+        reader = self._get_reader()
+        for chunk in reader.read_raw_data_for_channel(channel.path):
             yield chunk
 
     def _read_channel_data_chunk_for_index(self, channel, index):
-        if self._reader is None:
-            raise RuntimeError(
-                "Cannot read channel data after the underlying TDMS reader is closed")
-        return self._reader.read_channel_chunk_for_index(channel.path, index)
+        return self._get_reader().read_channel_chunk_for_index(channel.path, index)
 
     def _read_channel_data(self, channel, offset=0, length=None):
         if offset < 0:
             raise ValueError("offset must be non-negative")
         if length is not None and length < 0:
             raise ValueError("length must be non-negative")
-        if self._reader is None:
-            raise RuntimeError(
-                "Cannot read channel data after the underlying TDMS reader is closed")
+        reader = self._get_reader()
 
         with Timer(log, "Allocate space for channel"):
             # Allocate space for data
@@ -313,7 +315,7 @@ def _read_channel_data(self, channel, offset=0, length=None):
 
         with Timer(log, "Read data for channel"):
             # Now actually read all the data
-            for chunk in self._reader.read_raw_data_for_channel(channel.path, offset, length):
+            for chunk in reader.read_raw_data_for_channel(channel.path, offset, length):
                 if chunk.data is not None:
                     channel_data.append_data(chunk.data)
                 if chunk.scaler_data is not None:
@@ -468,6 +470,11 @@ def as_dataframe(self, time_index=False, absolute_time=False, scaled_data=True):
 
         return pandas_export.from_group(self, time_index, absolute_time, scaled_data)
 
+    def __len__(self):
+        """ Returns the number of channels in this group
+        """
+        return len(self._channels)
+
     def __iter__(self):
         """ Returns an iterator over the names of channels in this group
         """
@@ -611,15 +618,19 @@ def dtype(self):
 
         :rtype: numpy.dtype
         """
+        channel_scaling = self._get_scaling()
+        if channel_scaling is not None:
+            return channel_scaling.get_dtype(self.data_type, self.scaler_data_types)
+        return self._raw_data_dtype()
+
+    def _raw_data_dtype(self):
         if self.data_type is types.String:
             return np.dtype('O')
         elif self.data_type is types.TimeStamp:
             return np.dtype('<M8[us]')
-
-        channel_scaling = self._get_scaling()
-        if channel_scaling is not None:
-            return channel_scaling.get_dtype(self.data_type, self.scaler_data_types)
-        return self.data_type.nptype
+        if self.data_type is not None and self.data_type.nptype is not None:
+            return self.data_type.nptype
+        return np.dtype('V8')
 
     @_property_builtin
     def data(self):
@@ -634,7 +645,7 @@ def data(self):
             raise RuntimeError("Channel data has not been read")
 
         if self._raw_data is None:
-            return np.empty((0, ))
+            return np.empty((0, ), dtype=self.dtype)
         if self._data_scaled is None:
             self._data_scaled = self._scale_data(self._raw_data)
         return self._data_scaled
@@ -649,7 +660,7 @@ def raw_data(self):
             raise RuntimeError("Channel data has not been read")
 
         if self._raw_data is None:
-            return np.empty((0, ))
+            return np.empty((0, ), dtype=self._raw_data_dtype())
         if self._raw_data.scaler_data:
             if len(self._raw_data.scaler_data) == 1:
                 return next(v for v in self._raw_data.scaler_data.values())
@@ -696,6 +707,9 @@ def read_data(self, offset=0, length=None, scaled=True):
             For DAQmx data a dictionary of scaler id to raw scaler data will be returned.
         """
         raw_data = self._tdms_file._read_channel_data(self, offset, length)
+        if raw_data is None:
+            dtype = self.dtype if scaled else self._raw_data_dtype()
+            return np.empty((0,), dtype=dtype)
         if scaled:
             return self._scale_data(raw_data)
         else:
@@ -1007,7 +1021,7 @@ def _data(self):
         if self._scaled_data is not None:
             return self._scaled_data
         if self._raw_data.data is None and self._raw_data.scaler_data is None:
-            return np.empty((0, ))
+            return np.empty((0, ), dtype=self._channel.dtype)
 
         scale = self._get_scaling()
         if scale is not None:

diff --git a/nptdms/tdms_segment.py b/nptdms/tdms_segment.py
@@ -260,16 +260,12 @@ def read_raw_data_index(self, f, raw_data_index_header):
         log.debug(
             "Object number of values in segment: %d", self.number_values)
 
-    @property
-    def total_raw_data_width(self):
-        return self.data_type.size
-
     def read_value(self, file):
         """Read a single value from the given file"""
 
         if self.data_type.nptype is not None:
             dtype = self.data_type.nptype.newbyteorder(self.endianness)
-            return fromfile(file, dtype=dtype, count=1)
+            return fromfile(file, dtype=dtype, count=1)[0]
         return self.data_type.read(file, self.endianness)
 
     def read_values(self, file, number_values):