Skip to content

Commit

Permalink
Add check for TDSm header when reading data (#196)
Browse files Browse the repository at this point in the history
his should hopefully catch most cases where the tdms_index file doesn't match the tdms data file.
  • Loading branch information
adamreeve committed May 4, 2020
1 parent ca18fd1 commit 560f7b7
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
17 changes: 17 additions & 0 deletions nptdms/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def read_raw_data(self):
raise RuntimeError(
"Cannot read data unless metadata has first been read")
for segment in self._segments:
self._verify_segment_start(segment)
for chunk in segment.read_raw_data(self._file):
yield chunk

Expand Down Expand Up @@ -136,6 +137,7 @@ def read_raw_data_for_channel(self, channel_path, offset=0, length=None):

segment_index = start_segment
for segment in self._segments[start_segment:end_segment + 1]:
self._verify_segment_start(segment)
# By default, read all chunks in a segment
chunk_offset = 0
num_chunks = segment.num_chunks
Expand Down Expand Up @@ -197,6 +199,7 @@ def read_channel_chunk_for_index(self, channel_path, index):
index_in_segment = index - segment_start_index
chunk_index = index_in_segment // chunk_size

self._verify_segment_start(segment)
chunk_data = next(segment.read_raw_data_for_channel(self._file, channel_path, chunk_index, 1))
chunk_offset = segment_start_index + chunk_index * chunk_size
return chunk_data, chunk_offset
Expand Down Expand Up @@ -272,6 +275,20 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
return (segment_position, toc_mask, endianness, data_position, raw_data_offset,
next_segment_offset, next_segment_pos)

def _verify_segment_start(self, segment):
""" When reading data for a segment, check for the TDSm tag at the start of the segment in an attempt
to detect any mismatch between tdms and tdms_index files.
"""
position = segment.position
self._file.seek(segment.position)
expected_tag = b'TDSm'
tag = self._file.read(4)
if tag != expected_tag:
raise ValueError(
"Attempted to read data segment at position {0} but did not find segment start header. ".format(
position) +
"Check that the tdms_index file matches the tdms data file.")

def _get_data_file_size(self):
current_pos = self._file.tell()
self._file.seek(0, os.SEEK_END)
Expand Down
60 changes: 60 additions & 0 deletions nptdms/test/test_tdms_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import os
import sys
from shutil import copyfile
import tempfile
from hypothesis import (assume, given, example, settings, strategies)
import numpy as np
Expand Down Expand Up @@ -461,6 +462,65 @@ def test_read_file_passed_as_pathlib_path():
compare_arrays(channel_obj.data, expected_channel_data)


def test_read_with_mismatching_index_file():
""" Test that reading data when the index file doesn't match the data file raises an error
"""

test_file = GeneratedFile()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", 3, 2),
channel_metadata("/'group'/'channel2'", 3, 2),
),
"01 00 00 00" "02 00 00 00"
"03 00 00 00" "04 00 00 00"
)
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", 3, 2),
channel_metadata("/'group'/'channel2'", 3, 2),
),
"01 00 00 00" "02 00 00 00"
"03 00 00 00" "04 00 00 00"
)

test_file_with_index = GeneratedFile()
test_file_with_index.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", 3, 3),
channel_metadata("/'group'/'channel2'", 3, 3),
),
"01 00 00 00" "02 00 00 00" "03 00 00 00"
"04 00 00 00" "05 00 00 00" "06 00 00 00"
)
test_file_with_index.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", 3, 3),
channel_metadata("/'group'/'channel2'", 3, 3),
),
"01 00 00 00" "02 00 00 00" "03 00 00 00"
"04 00 00 00" "05 00 00 00" "06 00 00 00"
)

with test_file.get_tempfile(delete=False) as tdms_file:
with test_file_with_index.get_tempfile_with_index() as tdms_file_with_index_path:
# Move index file from second file to match the name of the first file
new_index_file = tdms_file.name + '_index'
copyfile(tdms_file_with_index_path + '_index', new_index_file)
try:
tdms_file.file.close()
with pytest.raises(ValueError) as exc_info:
_ = TdmsFile.read(tdms_file.name)
assert 'Check that the tdms_index file matches the tdms data file' in str(exc_info.value)
finally:
os.remove(new_index_file)
os.remove(tdms_file.name)


@pytest.mark.filterwarnings('ignore:.* is deprecated')
def test_get_objects():
"""Test reading data"""
Expand Down

0 comments on commit 560f7b7

Please sign in to comment.