Skip to content

Commit

Permalink
Add performance benchmarks (#180)
Browse files Browse the repository at this point in the history
Adds benchmarks using the pytest-benchmark package. Benchmarking is disabled when running pytest normally, benchmarks can be run with pytest --benchmark-enable --benchmark-only.
  • Loading branch information
adamreeve committed Apr 9, 2020
1 parent 5ef9394 commit 53d7cbc
Show file tree
Hide file tree
Showing 5 changed files with 347 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*.swp
.tox
.hypothesis
.benchmark

# Wercker directories
_builds
Expand Down
316 changes: 316 additions & 0 deletions nptdms/test/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
import os
import numpy as np
import pytest

from nptdms import TdmsFile
from nptdms.test.util import (
GeneratedFile,
hexlify_value,
string_hexlify,
segment_objects_metadata,
channel_metadata,
channel_metadata_with_no_data,
channel_metadata_with_repeated_structure)
from nptdms.test.scenarios import TDS_TYPE_INT32


@pytest.mark.benchmark(group='read-all-data')
def test_read_contiguous_data(benchmark):
""" Benchmark reading a file with multiple channels of contiguous data
"""
tdms_file = benchmark(read_from_start, get_contiguous_file().get_bytes_io_file())

np.testing.assert_equal(tdms_file['group']['channel1'][:], np.repeat([1], 10000))
np.testing.assert_equal(tdms_file['group']['channel2'][:], np.repeat([2], 10000))
np.testing.assert_equal(tdms_file['group']['channel3'][:], np.repeat([3], 10000))
np.testing.assert_equal(tdms_file['group']['channel4'][:], np.repeat([4], 10000))


@pytest.mark.benchmark(group='read-all-data')
def test_read_interleaved_data(benchmark):
""" Benchmark reading a file with interleaved data
"""
tdms_file = benchmark(read_from_start, get_interleaved_file().get_bytes_io_file())

np.testing.assert_equal(tdms_file['group']['channel1'][:], np.repeat([1], 10000))
np.testing.assert_equal(tdms_file['group']['channel2'][:], np.repeat([2], 10000))
np.testing.assert_equal(tdms_file['group']['channel3'][:], np.repeat([3], 10000))
np.testing.assert_equal(tdms_file['group']['channel4'][:], np.repeat([4], 10000))


@pytest.mark.benchmark(group='read-all-channel')
def test_read_contiguous_data_channel(benchmark):
""" Benchmark reading a single channel's data from a contiguous data file
"""
with TdmsFile.open(get_contiguous_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(read_channel_data, channel)

expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-all-channel')
def test_read_interleaved_data_channel(benchmark):
""" Benchmark reading a single channel's data from an interleaved data file
"""
with TdmsFile.open(get_interleaved_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(read_channel_data, channel)

expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-all-channel')
def test_stream_contiguous_data_channel(benchmark):
""" Benchmark streaming channel data from a contiguous data file
"""
with TdmsFile.open(get_contiguous_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(stream_chunks, channel)

channel_data = np.concatenate(channel_data)
expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-all-channel')
def test_stream_interleaved_data_channel(benchmark):
""" Benchmark streaming channel data from an interleaved data file
"""
with TdmsFile.open(get_interleaved_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(stream_chunks, channel)

channel_data = np.concatenate(channel_data)
expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='slice-channel')
def test_slice_contiguous_data_channel(benchmark):
""" Benchmark reading a slice of data from a contiguous data file
"""
with TdmsFile.open(get_contiguous_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(get_slice, channel, 5555, 6555)

expected_data = np.repeat([3], 1000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='slice-channel')
def test_slice_interleaved_data_channel(benchmark):
""" Benchmark reading a slice of data from an interleaved data file
"""
with TdmsFile.open(get_interleaved_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = benchmark(get_slice, channel, 5555, 6555)

expected_data = np.repeat([3], 1000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-all-channel')
def test_index_contiguous_data_channel(benchmark):
""" Benchmark reading a data from a contiguous data file using integer indices
"""
with TdmsFile.open(get_contiguous_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = np.zeros(10000, dtype=channel.dtype)
benchmark(index_values, channel, channel_data)

expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-all-channel')
def test_index_interleaved_data_channel(benchmark):
""" Benchmark reading a data from a interleaved data file using integer indices
"""
with TdmsFile.open(get_interleaved_file().get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel3']
channel_data = np.zeros(10000, dtype=channel.dtype)
benchmark(index_values, channel, channel_data)

expected_data = np.repeat([3], 10000)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-scaled-channel')
def test_stream_scaled_data_chunks(benchmark):
""" Benchmark streaming channel data when the data is scaled
"""
properties = {
"NI_Number_Of_Scales":
(3, "01 00 00 00"),
"NI_Scale[0]_Scale_Type":
(0x20, hexlify_value("<I", len("Linear")) + string_hexlify("Linear")),
"NI_Scale[0]_Linear_Slope":
(10, hexlify_value("<d", 2.0)),
"NI_Scale[0]_Linear_Y_Intercept":
(10, hexlify_value("<d", 10.0))
}
test_file = GeneratedFile()
data_array = np.arange(0, 1000, dtype=np.dtype('int32'))
data = data_array.tobytes()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", TDS_TYPE_INT32, 100, properties),
),
data, binary_data=True
)
for _ in range(0, 9):
test_file.add_segment(
("kTocRawData", ), "", data, binary_data=True)

with TdmsFile.open(test_file.get_bytes_io_file()) as tdms_file:
channel = tdms_file['group']['channel1']
channel_data = benchmark(stream_chunks, channel)

channel_data = np.concatenate(channel_data)
expected_data = np.tile(10.0 + 2.0 * data_array, 10)
np.testing.assert_equal(channel_data, expected_data)


@pytest.mark.benchmark(group='read-metadata')
def test_complex_metadata_reading(benchmark):
""" Benchmark reading metadata for a file with many channels and segments with alternating sets of objects
"""
test_file = GeneratedFile()
data = np.array([0] * 5, dtype=np.dtype('int32')).tobytes()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel0'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel1'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel2'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel3'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel4'", TDS_TYPE_INT32, 1),
),
data, binary_data=True
)
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel5'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel6'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel7'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel8'", TDS_TYPE_INT32, 1),
channel_metadata("/'group'/'channel9'", TDS_TYPE_INT32, 1),
),
data, binary_data=True
)
for _ in range(9):
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata_with_no_data("/'group'/'channel0'"),
channel_metadata_with_no_data("/'group'/'channel1'"),
channel_metadata_with_no_data("/'group'/'channel2'"),
channel_metadata_with_no_data("/'group'/'channel3'"),
channel_metadata_with_no_data("/'group'/'channel4'"),
channel_metadata_with_repeated_structure("/'group'/'channel5'"),
channel_metadata_with_repeated_structure("/'group'/'channel6'"),
channel_metadata_with_repeated_structure("/'group'/'channel7'"),
channel_metadata_with_repeated_structure("/'group'/'channel8'"),
channel_metadata_with_repeated_structure("/'group'/'channel9'"),
),
data, binary_data=True
)
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata_with_repeated_structure("/'group'/'channel0'"),
channel_metadata_with_repeated_structure("/'group'/'channel1'"),
channel_metadata_with_repeated_structure("/'group'/'channel2'"),
channel_metadata_with_repeated_structure("/'group'/'channel3'"),
channel_metadata_with_repeated_structure("/'group'/'channel4'"),
channel_metadata_with_no_data("/'group'/'channel5'"),
channel_metadata_with_no_data("/'group'/'channel6'"),
channel_metadata_with_no_data("/'group'/'channel7'"),
channel_metadata_with_no_data("/'group'/'channel8'"),
channel_metadata_with_no_data("/'group'/'channel9'"),
),
data, binary_data=True
)

tdms_file = benchmark(read_metadata_from_start, test_file.get_bytes_io_file())

assert len(tdms_file) == 1
assert len(tdms_file['group']) == 10
for channel_num in range(10):
assert len(tdms_file['group']['channel{0}'.format(channel_num)]) == 10


def get_contiguous_file():
test_file = GeneratedFile()
data_chunk = np.repeat(np.array([1, 2, 3, 4], dtype=np.dtype('int32')), 100)
data_array = np.tile(data_chunk, 10)
data = data_array.tobytes()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel2'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel3'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel4'", TDS_TYPE_INT32, 100),
),
data, binary_data=True
)
for _ in range(0, 9):
test_file.add_segment(
("kTocRawData", ), "", data, binary_data=True)
return test_file


def get_interleaved_file():
test_file = GeneratedFile()
data_array = np.tile(np.array([1, 2, 3, 4], dtype=np.dtype('int32')), 1000)
data = data_array.tobytes()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList", "kTocInterleavedData"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel2'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel3'", TDS_TYPE_INT32, 100),
channel_metadata("/'group'/'channel4'", TDS_TYPE_INT32, 100),
),
data, binary_data=True
)
for _ in range(0, 9):
test_file.add_segment(
("kTocRawData", "kTocInterleavedData"), "", data, binary_data=True)
return test_file


def read_from_start(file):
file.seek(0, os.SEEK_SET)
return TdmsFile.read(file)


def read_metadata_from_start(file):
file.seek(0, os.SEEK_SET)
return TdmsFile.read_metadata(file)


def read_channel_data(chan):
return chan[:]


def stream_chunks(chan):
all_data = []
for chunk in chan.data_chunks():
all_data.append(chunk[:])
return all_data


def get_slice(chan, start, stop):
return chan[start:stop]


def index_values(chan, target):
for i in range(len(chan)):
target[i] = chan[i]
41 changes: 24 additions & 17 deletions nptdms/test/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,24 +205,27 @@ class GeneratedFile(object):
def __init__(self):
self._content = []

def add_segment(self, toc, metadata, data, incomplete=False):
def add_segment(self, toc, metadata, data, incomplete=False, binary_data=False):
metadata_bytes = _hex_to_bytes(metadata)
data_bytes = _hex_to_bytes(data)
data_bytes = data if binary_data else _hex_to_bytes(data)
if toc is not None:
lead_in = b'TDSm'
toc_mask = long(0)
if "kTocMetaData" in toc:
toc_mask = toc_mask | long(1) << 1
if "kTocRawData" in toc:
toc_mask = toc_mask | long(1) << 3
if "kTocDAQmxRawData" in toc:
toc_mask = toc_mask | long(1) << 7
if "kTocInterleavedData" in toc:
toc_mask = toc_mask | long(1) << 5
if "kTocBigEndian" in toc:
toc_mask = toc_mask | long(1) << 6
if "kTocNewObjList" in toc:
toc_mask = toc_mask | long(1) << 2
for toc_item in toc:
if toc_item == "kTocMetaData":
toc_mask = toc_mask | long(1) << 1
elif toc_item == "kTocRawData":
toc_mask = toc_mask | long(1) << 3
elif toc_item == "kTocDAQmxRawData":
toc_mask = toc_mask | long(1) << 7
elif toc_item == "kTocInterleavedData":
toc_mask = toc_mask | long(1) << 5
elif toc_item == "kTocBigEndian":
toc_mask = toc_mask | long(1) << 6
elif toc_item == "kTocNewObjList":
toc_mask = toc_mask | long(1) << 2
else:
raise ValueError("Unrecognised TOC value: %s" % toc_item)
lead_in += struct.pack('<i', toc_mask)
lead_in += _hex_to_bytes("69 12 00 00")
next_segment_offset = len(metadata_bytes) + len(data_bytes)
Expand Down Expand Up @@ -266,6 +269,12 @@ def load(self, *args, **kwargs):
file.seek(0)
return tdms.TdmsFile(file, *args, **kwargs)

def get_bytes_io_file(self):
file = BytesIO()
file.write(self._get_contents())
file.seek(0)
return file

def _get_contents(self):
contents = b''
for segment in self._content:
Expand All @@ -287,9 +296,7 @@ def _get_index_contents(self):

class BytesIoTestFile(GeneratedFile):
def load(self, *args, **kwargs):
file = BytesIO()
file.write(self._get_contents())
file.seek(0)
file = self.get_bytes_io_file()
return tdms.TdmsFile(file, *args, **kwargs)


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def read_version():
],
install_requires = ['numpy'],
extras_require = {
'test': ['pytest>=3.1.0', 'hypothesis', 'mock<4.0;python_version<"3.4"'],
'test': ['pytest>=3.1.0', 'hypothesis', 'pytest-benchmark', 'mock<4.0;python_version<"3.4"'],
'pandas': ['pandas'],
'hdf': ['h5py>=2.10.0'],
'thermocouple_scaling': ['thermocouples_reference', 'scipy'],
Expand Down

0 comments on commit 53d7cbc

Please sign in to comment.