Skip to content

Commit

Permalink
Support writing tdms_index files and directly reading tdms_index files (
Browse files Browse the repository at this point in the history
#278)

* Can optionally write a tdms_index file while writing a tdms file
* Can open a tdms_index file directly with a path or file object rather than needing to open a tdms file
* Added TdmsWriter.defragment method to rewrite a tdms file
  • Loading branch information
Eifi1 committed Jul 13, 2022
1 parent c283ecb commit cf88de6
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 45 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ coverage.xml
.tox
.hypothesis
.benchmarks
.ipynb_checkpoints
.vscode
*.ipynb

# Wercker directories
_builds
Expand Down
73 changes: 53 additions & 20 deletions nptdms/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,50 +30,76 @@ def __init__(self, tdms_file):
:param tdms_file: Either the path to the tdms file to read
as a string or pathlib.Path, or an already opened file.
"""
self._file_path = None
self._index_file_path = None
self._file = None
self._index_file = None

self._segments = None
self._prev_segment_objects = {}
self.object_metadata = OrderedDict()
self._file_path = None
self._index_file_path = None
self._segment_channel_offsets = {}
self.tdms_version = None

if hasattr(tdms_file, "read"):
# Is a file
self._file = tdms_file
tag = tdms_file.read(4)
tdms_file.seek(0, os.SEEK_SET)

if tag == b"TDSh":
self._index_file = tdms_file
elif tag == b"TDSm":
self._file = tdms_file
else:
raise ValueError(
f"File should either start with 'b`TDSh`' or 'b`TDSm`', submitted starts with '{tag}'.")

else:
# Is path to a file
self._file_path = str(tdms_file)
self._file = open(self._file_path, 'rb')
index_file_path = self._file_path + '_index'
if os.path.isfile(index_file_path):
self._index_file_path = index_file_path
source_path = str(tdms_file)
if source_path.endswith(".tdms_index"):
self._index_file_path = source_path
self._index_file = open(self._index_file_path, "rb")

else:
self._file_path = source_path
self._file = open(self._file_path, "rb")

filepath = self._file_path + '_index'
if os.path.isfile(filepath):
self._index_file_path = filepath
self._index_file = open(self._index_file_path, "rb")

def close(self):
if self._file is None:
if self._file is None and self._index_file is None:
# Already closed
return

if self._file_path is not None:
# File path was provided so we opened the file and
# should close it.
# File path was provided so we opened the file and should close it.
self._file.close()
# Otherwise always remove reference to the file

if self._index_file_path is not None:
# Index file path was provided so we opened the file and should close it.
self._index_file.close()

# Finally always remove reference to the files
self._file = None
self._index_file = None

def read_metadata(self, require_segment_indexes=False):
""" Read all metadata and structure information from a TdmsFile
:param require_segment_indexes: Whether to create segment object indexes to allow lookup of objects by path.
"""
self._ensure_open()

if self._index_file_path is not None:
if self._index_file is not None: # generally try to read metadata from index file because it is faster
file = self._index_file
reading_index_file = True
file = open(self._index_file_path, 'rb')
else:
reading_index_file = False
elif self._file is not None: # fallback if only a data file is supplied
file = self._file
reading_index_file = False
else:
raise ValueError("Neither tdms_index file nor tdms file is available.")

self._segments = []
segment_position = 0
Expand Down Expand Up @@ -102,7 +128,7 @@ def read_metadata(self, require_segment_indexes=False):
else:
file.seek(segment.next_segment_pos, os.SEEK_SET)
finally:
if reading_index_file:
if reading_index_file and self._index_file_path is not None:
file.close()

def read_raw_data(self):
Expand Down Expand Up @@ -231,6 +257,13 @@ def read_channel_chunk_for_index(self, channel_path, index):
chunk_offset = segment_start_index + chunk_index * chunk_size
return chunk_data, chunk_offset

def is_index_file_only(self):
""" Convenience function to access if the supplied file is an index file and no data file is available
:rtype: bool
"""
return self._file is None and self._index_file is not None

def _read_segment_metadata(
self, file, segment_position, index_cache, previous_segment, is_index_file):
(position, toc_mask, data_position, next_segment_pos, segment_incomplete) = self._read_lead_in(
Expand Down Expand Up @@ -389,7 +422,7 @@ def _build_index(self, channel_path):
self._segment_channel_offsets[channel_path] = (first_segment, channel_offsets)

def _ensure_open(self):
if self._file is None:
if self._file is None and self._index_file is None:
raise RuntimeError(
"Cannot read data after the underlying TDMS reader is closed")

Expand Down
8 changes: 7 additions & 1 deletion nptdms/tdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ def __init__(self, file, raw_timestamps=False, memmap_dir=None, read_metadata_on

self._reader = TdmsReader(file)
try:
self._read_file(self._reader, read_metadata_only, keep_open)
self._read_file(
self._reader,
read_metadata_only if not self._reader.is_index_file_only() else True,
keep_open
)
finally:
if not keep_open:
self._reader.close()
Expand Down Expand Up @@ -787,6 +791,8 @@ def _read_channel_data(self, offset=0, length=None):
raise ValueError("offset must be non-negative")
if length is not None and length < 0:
raise ValueError("length must be non-negative")
if self._reader.is_index_file_only():
raise RuntimeError("Data cannot be read from index file only")

with Timer(log, "Allocate space for channel"):
# Allocate space for data
Expand Down
22 changes: 8 additions & 14 deletions nptdms/test/test_tdms_file.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
"""Test reading of example TDMS files"""

from collections import defaultdict
import logging
import os
import sys
from shutil import copyfile
import tempfile
import weakref
from hypothesis import (assume, given, example, settings, strategies, HealthCheck)
from collections import defaultdict
from shutil import copyfile

import numpy as np
import pytest
from hypothesis import (HealthCheck, assume, example, given, settings,
strategies)
from nptdms import TdmsFile
from nptdms.log import log_manager
from nptdms.test.util import (
BytesIoTestFile,
GeneratedFile,
basic_segment,
channel_metadata,
compare_arrays,
hexlify_value,
segment_objects_metadata,
string_hexlify,
)
from nptdms.test import scenarios

from nptdms.test.util import (BytesIoTestFile, GeneratedFile, basic_segment,
channel_metadata, compare_arrays, hexlify_value,
segment_objects_metadata, string_hexlify)

# When running tests on GitHub actions, the first iteration can be quite
# slow and cause failures, so disable deadlines:
Expand Down
72 changes: 69 additions & 3 deletions nptdms/test/writer/test_tdms_segment.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
"""Test TdmsSegment"""

import os
import tempfile
from collections import OrderedDict
from datetime import datetime
import pytest
import numpy as np
from io import BytesIO

from nptdms.writer import ChannelObject, TdmsSegment, read_properties_dict
import numpy as np
import pytest
from nptdms.tdms import TdmsFile
from nptdms.types import *
from nptdms.writer import (ChannelObject, GroupObject, RootObject, TdmsSegment,
TdmsWriter, read_properties_dict)


class StubObject(object):
Expand Down Expand Up @@ -228,3 +233,64 @@ def _assert_sequence_equal(values, expected_values):
(expected, position))
except StopIteration:
pass


def test_defragment_files():
buf = BytesIO()
with TdmsWriter(buf) as file:
file.write_segment([
RootObject(properties={"file": "file1"}),
GroupObject("group1"),
ChannelObject("group1", "channel1", np.linspace(0, 1))
])
buf.seek(0, os.SEEK_SET)

target_buf = BytesIO()
TdmsWriter.defragment(buf, target_buf)


def test_write_data_stream_with_index():
data_file = BytesIO()
index_file = BytesIO()
with TdmsWriter(data_file, index_file=index_file) as file:
file.write_segment([
RootObject(properties={"file": "file1"}),
GroupObject("group1"),
ChannelObject("group1", "channel1", np.linspace(0, 1))
])

data_file.seek(0, 0)
index_file.seek(0, 0)
assert len(data_file.read()) > 0
assert len(index_file.read()) > 0


def test_write_data_stream_without_index():
data_file = BytesIO()
index_file = BytesIO()
with TdmsWriter(data_file) as file:
file.write_segment([
RootObject(properties={"file": "file1"}),
GroupObject("group1"),
ChannelObject("group1", "channel1", np.linspace(0, 1))
])

data_file.seek(0, 0)
index_file.seek(0, 0)
assert len(data_file.read()) > 0
assert len(index_file.read()) == 0


def test_write_and_store_index_file():
directory = tempfile.mkdtemp()
tdms_path = os.path.join(directory, 'test_file.tdms')

writer = TdmsWriter(tdms_path, index_file=True)
with writer as file:
file.write_segment([
RootObject(properties={"file": "file1"}),
GroupObject("group1"),
ChannelObject("group1", "channel1", np.linspace(0, 1))
])

assert os.path.isfile(tdms_path + "_index")

0 comments on commit cf88de6

Please sign in to comment.