Skip to content

Commit

Permalink
Allow reading timestamps as raw TDMS timestamps (#200)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve committed May 15, 2020
1 parent c1d0689 commit c4ced51
Show file tree
Hide file tree
Showing 12 changed files with 523 additions and 99 deletions.
3 changes: 0 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,6 @@ Limitations
This module doesn't support TDMS files with XML headers or with
extended precision floating point data.

TDMS files support timestamps with a resolution of 2^-64 seconds but these
are read as numpy datetime64 values with microsecond resolution.

Contributors/Thanks
-------------------

Expand Down
11 changes: 11 additions & 0 deletions docs/apireference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ Data Types for Property Values

.. autoclass:: TimeStamp

Timestamps
----------

.. module:: nptdms.timestamp

.. autoclass:: TdmsTimestamp
:members:

.. autoclass:: TimestampArray()
:members:

Indices and Tables
------------------

Expand Down
25 changes: 1 addition & 24 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,27 +224,4 @@
]


# Mock numpy module so that docs will build on readthedocs
# Based on docs at http://read-the-docs.readthedocs.org/en/latest/faq.html
class Mock(object):
def __init__(self, *args, **kwargs):
pass

def __call__(self, *args, **kwargs):
return Mock()

@classmethod
def __getattr__(cls, name):
if name in ('__file__', '__path__'):
return '/dev/null'
elif name[0] == name[0].upper():
mockType = type(name, (), {})
mockType.__module__ = __name__
return mockType
else:
return Mock()


MOCK_MODULES = ['numpy', 'np']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = Mock()
autodoc_mock_imports = ['numpy', 'np', 'numpy.polynomial.polynomial']
49 changes: 46 additions & 3 deletions docs/reading.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,52 @@ and your operating system will then page data in and out of memory as required::
Timestamps
----------

Timestamps are represented by numpy datetime64 objects with microsecond precision.
Note that TDMS files are capable of storing times with a precision of 2 :sup:`-64` seconds,
so some precision is lost when reading them in npTDMS.
By default, timestamps are read as numpy datetime64 objects with microsecond precision.
However, TDMS files are capable of storing times with a precision of 2\ :sup:`-64` seconds.
If you need access to this higher precision timestamp data, all methods for constructing a :py:class:`~nptdms.TdmsFile`
accept a ``raw_timestamps`` parameter.
When this is true, any timestamp properties will be returned as a :py:class:`~nptdms.timestamp.TdmsTimestamp`
object. This has ``seconds`` and ``second_fractions`` attributes which are the number of seconds
since the epoch 1904-01-01 00:00:00 UTC, and a positive number of 2\ :sup:`-64` fractions of a second.
This class has methods for converting to a numpy datetime64 object or datetime.datetime. For example::

>>> timestamp = channel.properties['wf_start_time']
>>> timestamp
TdmsTimestamp(3670436596, 11242258187010646344)
>>> timestamp.seconds
3670436596
>>> timestamp.second_fractions
11242258187010646344
>>> print(timestamp)
2020-04-22T21:43:16.609444
>>> timestamp.as_datetime64('ns')
numpy.datetime64('2020-04-22T21:43:16.609444037')
>>> timestamp.as_datetime()
datetime.datetime(2020, 4, 22, 21, 43, 16, 609444)

When setting ``raw_timestamps`` to true, channels with timestamp data will return data as a
:py:class:`~nptdms.timestamp.TimestampArray` rather than as a ``datetime64`` array.
This is a subclass of ``numpy.ndarray`` with additional properties and an
:py:meth:`~nptdms.timestamp.TimestampArray.as_datetime64` method for converting to a datetime64 array,
and elements in the array are returned as :py:class:`~nptdms.timestamp.TdmsTimestamp` instances::

>>> timestamp_data = channel[:]
>>> timestamp_data
TimestampArray([(8942011409353408512, 3670436596), (9643130391967563776, 3670436596),
(9661619779500244992, 3670436596), ..., (1366710545511612416, 3670502040),
(1476995959824056320, 3670502040), (1587685994415521792, 3670502040)],
dtype=[('second_fractions', '<u8'), ('seconds', '<i8')])
>> timestamp_data[0]
TdmsTimestamp(3670436596, 8942011409353408512)
>>> timestamp_data.seconds
array([3670436596, 3670436596, 3670436596, ..., 3670502040, 3670502040, 3670502040], dtype=int64)
>>> timestamp_data.second_fractions
array([8942011409353408512, 9643130391967563776, 9661619779500244992, ..., 1366710545511612416,
1476995959824056320, 1587685994415521792], dtype=uint64)
>>> timestamp_data.as_datetime64('us')
array(['2020-04-22T21:43:16.484747', '2020-04-22T21:43:16.522755', '2020-04-22T21:43:16.523757', ...,
'2020-04-23T15:54:00.074089', '2020-04-23T15:54:00.080068', '2020-04-23T15:54:00.086068'],
dtype='datetime64[us]')

Timestamps in TDMS files are stored in UTC time and npTDMS does not do any timezone conversions.
If timestamps need to be converted to the local timezone,
Expand Down
50 changes: 49 additions & 1 deletion nptdms/channel_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
import numpy as np

from nptdms import types
from nptdms.timestamp import TimestampArray
from nptdms.log import log_manager

log = log_manager.get_logger(__name__)


def get_data_receiver(obj, num_values, memmap_dir=None):
def get_data_receiver(obj, num_values, raw_timestamps, memmap_dir=None):
"""Return a new channel data receiver to use for the given TDMS object
:param obj: TDMS channel object to receive data for
:param num_values: Number of values to be stored
:param raw_timestamps: Whether to store timestamp data as raw TDMS timestamps or a numpy datetime64 array
:param memmap_dir: Optional directory to store memory map files,
or None to not use memory map files
"""
Expand All @@ -24,6 +26,9 @@ def get_data_receiver(obj, num_values, memmap_dir=None):
if obj.data_type == types.DaqMxRawData:
return DaqmxDataReceiver(obj, num_values, memmap_dir)

if obj.data_type == types.TimeStamp:
return TimestampDataReceiver(obj, num_values, raw_timestamps, memmap_dir)

if obj.data_type.nptype is None:
return ListDataReceiver(obj)

Expand Down Expand Up @@ -124,6 +129,49 @@ def append_scaler_data(self, scale_id, new_data):
self._scaler_insert_positions[scale_id] += len(new_data)


class TimestampDataReceiver(object):
"""Receives timestamp data for a TDMS object and stores it in a numpy array
:ivar data: Data that has been read for the object as either a TimestampArray
or datetime64 array depending on whether raw_timestamps is True or False
"""

def __init__(self, obj, num_values, raw_timestamps=False, memmap_dir=None):
"""Initialise timestamp data receiver backed by a numpy array
:param obj: Object to store data for
:param num_values: Number of values to be stored
:param raw_timestamps: Whether to store data as raw TDMS timestamps or a numpy datetime64 array
:param memmap_dir: Optional directory to store memory map files in.
"""

self.path = obj.path
self._raw_timestamps = raw_timestamps
if raw_timestamps:
byte_array = _new_numpy_array(np.dtype('uint8'), num_values * 16, memmap_dir)
dtype = np.dtype([('second_fractions', 'uint64'), ('seconds', 'int64')])
self.data = TimestampArray(byte_array.view(dtype))
else:
self.data = _new_numpy_array(np.dtype('datetime64[us]'), num_values, memmap_dir)
self.scaler_data = {}
self._data_insert_position = 0
log.debug("Allocated %d sample slots for %s", len(self.data), obj.path)

def append_data(self, new_data):
"""Update the object data with a new array of data"""

log.debug("Adding %d data points to data for %s", len(new_data), self.path)
start_pos = self._data_insert_position
end_pos = self._data_insert_position + len(new_data)
if self._raw_timestamps:
# Need to be careful about potential endianness mismatch, so order of fields can differ
self.data['seconds'][start_pos:end_pos] = new_data['seconds']
self.data['second_fractions'][start_pos:end_pos] = new_data['second_fractions']
else:
self.data[start_pos:end_pos] = new_data.as_datetime64()
self._data_insert_position += len(new_data)


def _new_numpy_array(dtype, num_values, memmap_dir=None):
"""Initialise a new numpy array for data
Expand Down
62 changes: 49 additions & 13 deletions nptdms/tdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from nptdms.channel_data import get_data_receiver
from nptdms.export import hdf_export, pandas_export
from nptdms.base_segment import RawChannelDataChunk
from nptdms.timestamp import TdmsTimestamp, TimestampArray


log = log_manager.get_logger(__name__)
Expand Down Expand Up @@ -57,48 +58,61 @@ class TdmsFile(object):
"""

@staticmethod
def read(file, memmap_dir=None):
def read(file, raw_timestamps=False, memmap_dir=None):
""" Creates a new TdmsFile object and reads all data in the file
:param file: Either the path to the tdms file to read
as a string or pathlib.Path, or an already opened file.
:param raw_timestamps: By default TDMS timestamps are read as numpy datetime64
but this loses some precision.
Setting this to true will read timestamps as a custom TdmsTimestamp type.
:param memmap_dir: The directory to store memory mapped data files in,
or None to read data into memory. The data files are created
as temporary files and are deleted when the channel data is no
longer used. tempfile.gettempdir() can be used to get the default
temporary file directory.
"""
return TdmsFile(file, memmap_dir=memmap_dir)
return TdmsFile(file, raw_timestamps=raw_timestamps, memmap_dir=memmap_dir)

@staticmethod
def open(file, memmap_dir=None):
def open(file, raw_timestamps=False, memmap_dir=None):
""" Creates a new TdmsFile object and reads metadata, leaving the file open
to allow reading channel data
:param file: Either the path to the tdms file to read
as a string or pathlib.Path, or an already opened file.
:param raw_timestamps: By default TDMS timestamps are read as numpy datetime64
but this loses some precision.
Setting this to true will read timestamps as a custom TdmsTimestamp type.
:param memmap_dir: The directory to store memory mapped data files in,
or None to read data into memory. The data files are created
as temporary files and are deleted when the channel data is no
longer used. tempfile.gettempdir() can be used to get the default
temporary file directory.
"""
return TdmsFile(file, memmap_dir=memmap_dir, read_metadata_only=True, keep_open=True)
return TdmsFile(
file, raw_timestamps=raw_timestamps, memmap_dir=memmap_dir, read_metadata_only=True, keep_open=True)

@staticmethod
def read_metadata(file):
def read_metadata(file, raw_timestamps=False):
""" Creates a new TdmsFile object and only reads the metadata
:param file: Either the path to the tdms file to read
as a string or pathlib.Path, or an already opened file.
:param raw_timestamps: By default TDMS timestamps are read as numpy datetime64
but this loses some precision.
Setting this to true will read timestamps as a custom TdmsTimestamp type.
"""
return TdmsFile(file, read_metadata_only=True)
return TdmsFile(file, raw_timestamps=raw_timestamps, read_metadata_only=True)

def __init__(self, file, memmap_dir=None, read_metadata_only=False, keep_open=False):
def __init__(self, file, raw_timestamps=False, memmap_dir=None, read_metadata_only=False, keep_open=False):
"""Initialise a new TdmsFile object
:param file: Either the path to the tdms file to read
as a string or pathlib.Path, or an already opened file.
:param raw_timestamps: By default TDMS timestamps are read as numpy datetime64
but this loses some precision.
Setting this to true will read timestamps as a custom TdmsTimestamp type.
:param memmap_dir: The directory to store memory mapped data files in,
or None to read data into memory. The data files are created
as temporary files and are deleted when the channel data is no
Expand All @@ -111,6 +125,7 @@ def __init__(self, file, memmap_dir=None, read_metadata_only=False, keep_open=Fa
"""

self._memmap_dir = memmap_dir
self._raw_timestamps = raw_timestamps
self._groups = OrderedDict()
self._properties = {}
self._channel_data = {}
Expand Down Expand Up @@ -178,6 +193,7 @@ def data_chunks(self):
reader = self._get_reader()
channel_offsets = defaultdict(int)
for chunk in reader.read_raw_data():
self._convert_data_chunk(chunk)
yield DataChunk(self, chunk, channel_offsets)
for path, data in chunk.channel_data.items():
channel_offsets[path] += len(data)
Expand Down Expand Up @@ -230,15 +246,16 @@ def _read_file(self, tdms_reader, read_metadata_only):
group_channels = OrderedDict()
for (path_string, obj) in tdms_reader.object_metadata.items():
path = ObjectPath.from_string(path_string)
obj_properties = self._convert_properties(obj.properties)
if path.is_root:
# Root object provides properties for the whole file
self._properties = obj.properties
self._properties = obj_properties
elif path.is_group:
group_properties[path.group] = obj.properties
group_properties[path.group] = obj_properties
else:
# Object is a channel
channel = TdmsChannel(
self, path, obj.properties, obj.data_type,
self, path, obj_properties, obj.data_type,
obj.scaler_data_types, obj.num_values)
if path.group in group_channels:
group_channels[path.group].append(channel)
Expand Down Expand Up @@ -268,7 +285,7 @@ def _read_data(self, tdms_reader):
for group in self.groups():
for channel in group.channels():
self._channel_data[channel.path] = get_data_receiver(
channel, len(channel), self._memmap_dir)
channel, len(channel), self._raw_timestamps, self._memmap_dir)

with Timer(log, "Read data"):
# Now actually read all the data
Expand All @@ -292,10 +309,13 @@ def _read_data(self, tdms_reader):
def _read_channel_data_chunks(self, channel):
reader = self._get_reader()
for chunk in reader.read_raw_data_for_channel(channel.path):
self._convert_channel_data_chunk(chunk)
yield chunk

def _read_channel_data_chunk_for_index(self, channel, index):
return self._get_reader().read_channel_chunk_for_index(channel.path, index)
(chunk, offset) = self._get_reader().read_channel_chunk_for_index(channel.path, index)
self._convert_channel_data_chunk(chunk)
return chunk, offset

def _read_channel_data(self, channel, offset=0, length=None):
if offset < 0:
Expand All @@ -311,7 +331,7 @@ def _read_channel_data(self, channel, offset=0, length=None):
else:
num_values = min(length, len(channel) - offset)
num_values = max(0, num_values)
channel_data = get_data_receiver(channel, num_values, self._memmap_dir)
channel_data = get_data_receiver(channel, num_values, self._raw_timestamps, self._memmap_dir)

with Timer(log, "Read data for channel"):
# Now actually read all the data
Expand All @@ -324,6 +344,22 @@ def _read_channel_data(self, channel, offset=0, length=None):

return channel_data

def _convert_properties(self, properties):
def convert_prop(val):
if isinstance(val, TdmsTimestamp) and not self._raw_timestamps:
# Convert timestamps to numpy datetime64 if raw timestamps are not requested
return val.as_datetime64()
return val
return {k: convert_prop(v) for (k, v) in properties.items()}

def _convert_data_chunk(self, chunk):
for channel_chunk in chunk.channel_data.values():
self._convert_channel_data_chunk(channel_chunk)

def _convert_channel_data_chunk(self, channel_chunk):
if not self._raw_timestamps and isinstance(channel_chunk.data, TimestampArray):
channel_chunk.data = channel_chunk.data.as_datetime64()

def object(self, *path):
"""(Deprecated) Get a TDMS object from the file
Expand Down

0 comments on commit c4ced51

Please sign in to comment.