From 179af72c43ced2fdf33f08df39ab11d5c828523f Mon Sep 17 00:00:00 2001 From: kensato Date: Fri, 28 Feb 2025 14:16:01 +0900 Subject: [PATCH 1/2] Fix: Use unsigned integer reading methods for PUSHDATA opcode lengths This commit fixes a bug in the Script._build_chunks method where signed integer reading methods were being used for PUSHDATA opcode length values. Since these values should always be treated as unsigned, using signed integer methods could result in negative values when high bits are set, causing incorrect chunk parsing. Changed to use read_uint8, read_uint16_le, and read_uint32_le methods instead of read_int for PUSHDATA1, PUSHDATA2, and PUSHDATA4 opcodes respectively. Added tests to verify correct handling of large data lengths for all PUSHDATA opcodes. --- bsv/keys.py | 2 + bsv/script/script.py | 14 ++- tests/test_script_chunk_oppushdata.py | 164 ++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 tests/test_script_chunk_oppushdata.py diff --git a/bsv/keys.py b/bsv/keys.py index abef47e..982237d 100644 --- a/bsv/keys.py +++ b/bsv/keys.py @@ -289,6 +289,8 @@ def verify_recoverable( def sign_text(self, text: str) -> Tuple[str, str]: """sign arbitrary text with bitcoin private key :returns: (p2pkh_address, stringified_recoverable_ecdsa_signature) + This function follows Bitcoin Signed Message Format. + For BRC-77, use signed_message.py instead. """ message: bytes = text_digest(text) return self.address(), stringify_ecdsa_recoverable(self.sign_recoverable(message), self.compressed) diff --git a/bsv/script/script.py b/bsv/script/script.py index 6314dff..6709805 100644 --- a/bsv/script/script.py +++ b/bsv/script/script.py @@ -52,12 +52,18 @@ def _build_chunks(self): data = None if b'\x01' <= op <= b'\x4b': data = reader.read_bytes(int.from_bytes(op, 'big')) - elif op == OpCode.OP_PUSHDATA1: - data = reader.read_bytes(reader.read_int8()) + elif op == OpCode.OP_PUSHDATA1: # 0x4c + length = reader.read_uint8() + if length is not None: + data = reader.read_bytes(length) elif op == OpCode.OP_PUSHDATA2: - data = reader.read_bytes(reader.read_int16_le()) + length = reader.read_uint16_le() + if length is not None: + data = reader.read_bytes(length) elif op == OpCode.OP_PUSHDATA4: - data = reader.read_bytes(reader.read_int32_le()) + length = reader.read_uint32_le() + if length is not None: + data = reader.read_bytes(length) chunk.data = data self.chunks.append(chunk) diff --git a/tests/test_script_chunk_oppushdata.py b/tests/test_script_chunk_oppushdata.py new file mode 100644 index 0000000..48a8c94 --- /dev/null +++ b/tests/test_script_chunk_oppushdata.py @@ -0,0 +1,164 @@ +import pytest +from bsv.script.script import Script +from bsv.constants import OpCode + + +def test_script_build_chunks_pushdata_opcodes(): + """ + Test that the Script._build_chunks method correctly handles PUSHDATA opcodes + when changing the reading method from byte-by-int to unit-based reading. + """ + + # Test PUSHDATA1 with a length value that would be negative if incorrectly interpreted as signed + # 0xff = 255 bytes of data + pushdata1_high_length = b'\x4c\xff' + b'\x42' * 255 + script_pushdata1 = Script(pushdata1_high_length) + assert len(script_pushdata1.chunks) == 1 + assert script_pushdata1.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_pushdata1.chunks[0].data == b'\x42' * 255 + assert len(script_pushdata1.chunks[0].data) == 255 + + # Test with smaller data sizes to ensure consistent behavior + pushdata1_75 = b'\x4c\xff' + b'\x42' * 75 + script_pushdata1_75 = Script(pushdata1_75) + assert len(script_pushdata1_75.chunks) == 1 + assert script_pushdata1_75.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_pushdata1_75.chunks[0].data == b'\x42' * 75 + + pushdata1_76 = b'\x4c\xff' + b'\x42' * 76 + script_pushdata1_76 = Script(pushdata1_76) + assert len(script_pushdata1_76.chunks) == 1 + assert script_pushdata1_76.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_pushdata1_76.chunks[0].data == b'\x42' * 76 + + # Test PUSHDATA2 with a length value that would be negative if incorrectly interpreted as signed + # 0xffff = 65535 bytes of data + pushdata2_high_length = b'\x4d\xff\xff' + b'\x42' * 65535 + script_pushdata2 = Script(pushdata2_high_length) + assert len(script_pushdata2.chunks) == 1 + assert script_pushdata2.chunks[0].op == OpCode.OP_PUSHDATA2 + assert script_pushdata2.chunks[0].data == b'\x42' * 65535 + assert len(script_pushdata2.chunks[0].data) == 65535 + + # Test with smaller data sizes for PUSHDATA2 + pushdata2_255 = b'\x4d\xff\xff' + b'\x42' * 255 + script_pushdata2_255 = Script(pushdata2_255) + assert len(script_pushdata2_255.chunks) == 1 + assert script_pushdata2_255.chunks[0].op == OpCode.OP_PUSHDATA2 + assert script_pushdata2_255.chunks[0].data == b'\x42' * 255 + + pushdata2_256 = b'\x4d\xff\xff' + b'\x42' * 256 + script_pushdata2_256 = Script(pushdata2_256) + assert len(script_pushdata2_256.chunks) == 1 + assert script_pushdata2_256.chunks[0].op == OpCode.OP_PUSHDATA2 + assert script_pushdata2_256.chunks[0].data == b'\x42' * 256 + + # Test PUSHDATA4 with values that would be negative if interpreted as signed integers + # Test with very large value - 0x80000001 = 2,147,483,649 (would be -2,147,483,647 as signed int32) + # Note: This test may require significant memory + pushdata4_large_value = b'\x4e\x01\x00\x00\x80' + b'\x42' * 2147483649 + script_pushdata4_large = Script(pushdata4_large_value) + assert len(script_pushdata4_large.chunks) == 1 + assert script_pushdata4_large.chunks[0].op == OpCode.OP_PUSHDATA4 + assert len(script_pushdata4_large.chunks[0].data) == 2147483649 + + # Test with smaller data sizes for PUSHDATA4 + pushdata4_upper_half = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65535 + script_pushdata4_upper_half = Script(pushdata4_upper_half) + assert len(script_pushdata4_upper_half.chunks) == 1 + assert script_pushdata4_upper_half.chunks[0].op == OpCode.OP_PUSHDATA4 + assert len(script_pushdata4_upper_half.chunks[0].data) == 65535 + + # Test with slightly larger data size + pushdata4_upper_half_2 = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65536 + script_pushdata4_upper_half_2 = Script(pushdata4_upper_half_2) + assert len(script_pushdata4_upper_half_2.chunks) == 1 + assert script_pushdata4_upper_half_2.chunks[0].op == OpCode.OP_PUSHDATA4 + assert len(script_pushdata4_upper_half_2.chunks[0].data) == 65536 + + # Test boundary cases where the length is exactly at important thresholds + # PUSHDATA1 with length 0 + pushdata1_zero = b'\x4c\x00' + script_pushdata1_zero = Script(pushdata1_zero) + assert len(script_pushdata1_zero.chunks) == 1 + assert script_pushdata1_zero.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_pushdata1_zero.chunks[0].data == b'' + assert len(script_pushdata1_zero.chunks[0].data) == 0 + + # Edge case: PUSHDATA with incomplete length specification + incomplete_pushdata1 = b'\x4c' # PUSHDATA1 without length byte + script_incomplete1 = Script(incomplete_pushdata1) + assert len(script_incomplete1.chunks) == 1 + assert script_incomplete1.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_incomplete1.chunks[0].data is None + + incomplete_pushdata2 = b'\x4d\xff' # PUSHDATA2 with incomplete length (only one byte) + script_incomplete2 = Script(incomplete_pushdata2) + assert len(script_incomplete2.chunks) == 1 + assert script_incomplete2.chunks[0].op == OpCode.OP_PUSHDATA2 + assert script_incomplete2.chunks[0].data == b'' + + # Edge case: PUSHDATA with specified length but insufficient data + insufficient_data1 = b'\x4c\x0A\x01\x02\x03' # PUSHDATA1 expecting 10 bytes but only 3 are provided + script_insufficient1 = Script(insufficient_data1) + assert len(script_insufficient1.chunks) == 1 + assert script_insufficient1.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_insufficient1.chunks[0].data == b'\x01\x02\x03' # Should get the available data + + # Multiple PUSHDATA opcodes in sequence to test parsing continuity + mixed_pushdata = ( + b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes + b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes + b'\x02\x08\x09' # Direct push of 2 bytes + ) + script_mixed = Script(mixed_pushdata) + assert len(script_mixed.chunks) == 3 + assert script_mixed.chunks[0].op == OpCode.OP_PUSHDATA1 + assert script_mixed.chunks[0].data == b'\x01\x02\x03' + assert script_mixed.chunks[1].op == OpCode.OP_PUSHDATA2 + assert script_mixed.chunks[1].data == b'\x04\x05\x06\x07' + assert script_mixed.chunks[2].op == b'\x02' + assert script_mixed.chunks[2].data == b'\x08\x09' + + +def test_script_serialization_with_pushdata(): + """ + Test that serialization and deserialization of scripts with PUSHDATA opcodes work correctly. + + This test verifies that scripts containing PUSHDATA opcodes can be: + 1. Serialized back to their original binary form + 2. Deserialized from binary to produce identical Script objects with properly parsed chunks + + This ensures the round-trip integrity of Script objects with various PUSHDATA operations. + """ + # Create a script with various PUSHDATA opcodes and direct push data + original_script = ( + b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes + b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes + b'\x02\x08\x09' # Direct push of 2 bytes + ) + + script = Script(original_script) + + # Serialize and deserialize the script + serialized = script.serialize() + deserialized = Script(serialized) + + # Verify the scripts are equivalent + assert serialized == original_script + assert deserialized.serialize() == original_script + + # Check that the chunks are correctly parsed + assert len(deserialized.chunks) == 3 + assert deserialized.chunks[0].op == OpCode.OP_PUSHDATA1 + assert deserialized.chunks[0].data == b'\x01\x02\x03' + assert deserialized.chunks[1].op == OpCode.OP_PUSHDATA2 + assert deserialized.chunks[1].data == b'\x04\x05\x06\x07' + assert deserialized.chunks[2].op == b'\x02' + assert deserialized.chunks[2].data == b'\x08\x09' + + +if __name__ == "__main__": + test_script_build_chunks_pushdata_opcodes() + test_script_serialization_with_pushdata() + print("All tests passed!") From 70efbd44d35531f6522ab2ad558d172b1db6e3fd Mon Sep 17 00:00:00 2001 From: kensato Date: Fri, 28 Feb 2025 14:36:04 +0900 Subject: [PATCH 2/2] update CHANGELOG.md --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c74888..33367ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,27 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Security - (Notify of any improvements related to security vulnerabilities or potential risks.) +--- +## [1.0.2] - 2025-02-28 + +### Added +- BIP32_DERIVATION_PATH environment variable for customizing default derivation paths +- Dedicated BIP32 derivation functions with clearer interface +- BIP44-specific functions that build on the BIP32 foundation +- Comprehensive tests for HD wallet key derivation consistency +- Enhanced error messages for hardened key derivation attempts from xpub keys + +### Fixed +- PUSHDATA opcode length parsing now uses unsigned integer reading methods (read_uint8, read_uint16_le, read_uint32_le) instead of signed integer methods to prevent incorrect chunk parsing with large data lengths +- Proper handling of edge cases in Script parsing including length 0 and incomplete length specifications +- Serialization/deserialization consistency for various PUSHDATA operations + +### Changed +- Refined HD wallet key derivation interface while maintaining backward compatibility +- Improved error messages for invalid derivation attempts +- Marked legacy derivation functions as deprecated while maintaining compatibility + + --- ## [1.0.1] - 2025-01-09