From 0b06e720c0182dee8b560d2e8d3891b036f63ea7 Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Fri, 1 Oct 2021 12:14:48 +1300 Subject: [PATCH 1/8] More detailed error checking for base58 addresses --- src/key_io.cpp | 30 ++++++++++++++----- .../functional/rpc_invalid_address_message.py | 8 ++--- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/key_io.cpp b/src/key_io.cpp index 615f4c9312..9e246c0b81 100644 --- a/src/key_io.cpp +++ b/src/key_io.cpp @@ -81,7 +81,11 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par std::vector data; uint160 hash; error_str = ""; - if (DecodeBase58Check(str, data, 21)) { + + // Note this will be false if it is a valid Bech32 address for a different network + bool is_bech32 = (ToLower(str.substr(0, params.Bech32HRP().size())) == params.Bech32HRP()); + + if (!is_bech32 && DecodeBase58Check(str, data, 21)) { // base58-encoded Bitcoin addresses. // Public-key-hash-addresses have version 0 (or 111 testnet). // The data vector contains RIPEMD160(SHA256(pubkey)), where pubkey is the serialized public key. @@ -98,15 +102,27 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par return ScriptHash(hash); } - // Set potential error message. - // This message may be changed if the address can also be interpreted as a Bech32 address. - error_str = "Invalid prefix for Base58-encoded address"; + if (!std::equal(script_prefix.begin(), script_prefix.end(), data.begin()) && + !std::equal(pubkey_prefix.begin(), pubkey_prefix.end(), data.begin())) { + error_str = "Invalid prefix for Base58-encoded address"; + } else { + error_str = "Invalid length for Base58 address"; + } + return CNoDestination(); + } else if (!is_bech32) { + // Try Base58 decoding without the checksum, using a much larger max length + if (!DecodeBase58(str, data, 100)) { + error_str = "Invalid HRP or Base58 character in address"; + } else { + error_str = "Invalid checksum or length of Base58 address"; + } + return CNoDestination(); } + data.clear(); const auto dec = bech32::Decode(str); if ((dec.encoding == bech32::Encoding::BECH32 || dec.encoding == bech32::Encoding::BECH32M) && dec.data.size() > 0) { // Bech32 decoding - error_str = ""; if (dec.hrp != params.Bech32HRP()) { error_str = "Invalid prefix for Bech32 address"; return CNoDestination(); @@ -168,8 +184,8 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par } } - // Set error message if address can't be interpreted as Base58 or Bech32. - if (error_str.empty()) error_str = "Invalid address format"; + //TODO: locate Bech32 errors + error_str = "Error in Bech32 encoding"; return CNoDestination(); } diff --git a/test/functional/rpc_invalid_address_message.py b/test/functional/rpc_invalid_address_message.py index 7ab5a5e90d..38008cbf42 100755 --- a/test/functional/rpc_invalid_address_message.py +++ b/test/functional/rpc_invalid_address_message.py @@ -39,7 +39,7 @@ def test_validateaddress(self): info = node.validateaddress(BECH32_INVALID_PREFIX) assert not info['isvalid'] - assert_equal(info['error'], 'Invalid prefix for Bech32 address') + assert_equal(info['error'], 'Invalid HRP or Base58 character in address') info = node.validateaddress(BECH32_INVALID_BECH32) assert not info['isvalid'] @@ -73,18 +73,18 @@ def test_validateaddress(self): # Invalid address format info = node.validateaddress(INVALID_ADDRESS) assert not info['isvalid'] - assert_equal(info['error'], 'Invalid address format') + assert_equal(info['error'], 'Invalid HRP or Base58 character in address') def test_getaddressinfo(self): node = self.nodes[0] assert_raises_rpc_error(-5, "Invalid Bech32 address data size", node.getaddressinfo, BECH32_INVALID_SIZE) - assert_raises_rpc_error(-5, "Invalid prefix for Bech32 address", node.getaddressinfo, BECH32_INVALID_PREFIX) + assert_raises_rpc_error(-5, "Invalid HRP or Base58 character in address", node.getaddressinfo, BECH32_INVALID_PREFIX) assert_raises_rpc_error(-5, "Invalid prefix for Base58-encoded address", node.getaddressinfo, BASE58_INVALID_PREFIX) - assert_raises_rpc_error(-5, "Invalid address format", node.getaddressinfo, INVALID_ADDRESS) + assert_raises_rpc_error(-5, "Invalid HRP or Base58 character in address", node.getaddressinfo, INVALID_ADDRESS) def run_test(self): self.test_validateaddress() From b62b67e06cc406fdad68da4c091168fb5f11c1d4 Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Fri, 1 Oct 2021 12:16:20 +1300 Subject: [PATCH 2/8] Add Bech32 error location function --- src/bech32.cpp | 319 +++++++++++++++++++++++++++++++++++++++++++++++-- src/bech32.h | 3 + src/key_io.cpp | 17 ++- src/key_io.h | 2 +- 4 files changed, 326 insertions(+), 15 deletions(-) diff --git a/src/bech32.cpp b/src/bech32.cpp index 9da2488ef2..905a2aa2c0 100644 --- a/src/bech32.cpp +++ b/src/bech32.cpp @@ -30,6 +30,155 @@ const int8_t CHARSET_REV[128] = { 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 }; + +const int16_t GF1024_EXP[] = { + 1, 303, 635, 446, 997, 640, 121, 142, 959, 420, 350, 438, 166, 39, 543, + 335, 831, 691, 117, 632, 719, 97, 107, 374, 558, 797, 54, 150, 858, 877, + 724, 1013, 294, 23, 354, 61, 164, 633, 992, 538, 469, 659, 174, 868, 184, + 809, 766, 563, 866, 851, 257, 520, 45, 770, 535, 524, 408, 213, 436, 760, + 472, 330, 933, 799, 616, 361, 15, 391, 756, 814, 58, 608, 554, 680, 993, + 821, 942, 813, 843, 484, 193, 935, 321, 919, 572, 741, 423, 559, 562, + 589, 296, 191, 493, 685, 891, 665, 435, 60, 395, 2, 606, 511, 853, 746, + 32, 219, 284, 631, 840, 661, 837, 332, 78, 311, 670, 887, 111, 195, 505, + 190, 194, 214, 709, 380, 819, 69, 261, 957, 1018, 161, 739, 588, 7, 708, + 83, 328, 507, 736, 317, 899, 47, 348, 1000, 345, 882, 245, 367, 996, 943, + 514, 304, 90, 804, 295, 312, 793, 387, 833, 249, 921, 660, 618, 823, 496, + 722, 30, 782, 225, 892, 93, 480, 372, 112, 738, 867, 636, 890, 950, 968, + 386, 622, 642, 551, 369, 234, 846, 382, 365, 442, 592, 343, 986, 122, + 1023, 59, 847, 81, 790, 4, 437, 983, 931, 244, 64, 415, 529, 487, 944, + 35, 938, 664, 156, 583, 53, 999, 222, 390, 987, 341, 388, 389, 170, 721, + 879, 138, 522, 627, 765, 322, 230, 440, 14, 168, 143, 656, 991, 224, 595, + 550, 94, 657, 752, 667, 1005, 451, 734, 744, 638, 292, 585, 157, 872, + 590, 601, 827, 774, 930, 475, 571, 33, 500, 871, 969, 173, 21, 828, 450, + 1009, 147, 960, 705, 201, 228, 998, 497, 1021, 613, 688, 772, 508, 36, + 366, 715, 468, 956, 725, 730, 861, 425, 647, 701, 221, 759, 95, 958, 139, + 805, 8, 835, 679, 614, 449, 128, 791, 299, 974, 617, 70, 628, 57, 273, + 430, 67, 750, 405, 780, 703, 643, 776, 778, 340, 171, 1022, 276, 308, + 495, 243, 644, 460, 857, 28, 336, 286, 41, 695, 448, 431, 364, 149, 43, + 233, 63, 762, 902, 181, 240, 501, 584, 434, 275, 1008, 444, 443, 895, + 812, 612, 927, 383, 66, 961, 1006, 690, 346, 3, 881, 900, 747, 271, 672, + 162, 402, 456, 748, 971, 755, 490, 105, 808, 977, 72, 732, 182, 897, 625, + 163, 189, 947, 850, 46, 115, 403, 231, 151, 629, 278, 874, 16, 934, 110, + 492, 898, 256, 807, 598, 700, 498, 140, 481, 91, 523, 860, 134, 252, 771, + 824, 119, 38, 816, 820, 641, 342, 757, 513, 577, 990, 463, 40, 920, 955, + 17, 649, 533, 82, 103, 896, 862, 728, 259, 86, 466, 87, 253, 556, 323, + 457, 963, 432, 845, 527, 745, 849, 863, 1015, 888, 488, 567, 727, 132, + 674, 764, 109, 669, 6, 1003, 552, 246, 542, 96, 324, 781, 912, 248, 694, + 239, 980, 210, 880, 683, 144, 177, 325, 546, 491, 326, 339, 623, 941, 92, + 207, 783, 462, 263, 483, 517, 1012, 9, 620, 220, 984, 548, 512, 878, 421, + 113, 973, 280, 962, 159, 310, 945, 268, 465, 806, 889, 199, 76, 873, 865, + 34, 645, 227, 290, 418, 693, 926, 80, 569, 639, 11, 50, 291, 141, 206, + 544, 949, 185, 518, 133, 909, 135, 467, 376, 646, 914, 678, 841, 954, + 318, 242, 939, 951, 743, 1017, 976, 359, 167, 264, 100, 241, 218, 51, 12, + 758, 368, 453, 309, 192, 648, 826, 553, 473, 101, 478, 673, 397, 1001, + 118, 265, 331, 650, 356, 982, 652, 655, 510, 634, 145, 414, 830, 924, + 526, 966, 298, 737, 18, 504, 401, 697, 360, 288, 1020, 842, 203, 698, + 537, 676, 279, 581, 619, 536, 907, 876, 1019, 398, 152, 1010, 994, 68, + 42, 454, 580, 836, 99, 565, 137, 379, 503, 22, 77, 582, 282, 412, 352, + 611, 347, 300, 266, 570, 270, 911, 729, 44, 557, 108, 946, 637, 597, 461, + 630, 615, 238, 763, 681, 718, 334, 528, 200, 459, 413, 79, 24, 229, 713, + 906, 579, 384, 48, 893, 370, 923, 202, 917, 98, 794, 754, 197, 530, 662, + 52, 712, 677, 56, 62, 981, 509, 267, 789, 885, 561, 316, 684, 596, 226, + 13, 985, 779, 123, 720, 576, 753, 948, 406, 125, 315, 104, 519, 426, 502, + 313, 566, 1016, 767, 796, 281, 749, 740, 136, 84, 908, 424, 936, 198, + 355, 274, 735, 967, 5, 154, 428, 541, 785, 704, 486, 671, 600, 532, 381, + 540, 574, 187, 88, 378, 216, 621, 499, 419, 922, 485, 494, 476, 255, 114, + 188, 668, 297, 400, 918, 787, 158, 25, 458, 178, 564, 422, 768, 73, 1011, + 717, 575, 404, 547, 196, 829, 237, 394, 301, 37, 65, 176, 106, 89, 85, + 675, 979, 534, 803, 995, 363, 593, 120, 417, 452, 26, 699, 822, 223, 169, + 416, 235, 609, 773, 211, 607, 208, 302, 852, 965, 603, 357, 761, 247, + 817, 539, 250, 232, 272, 129, 568, 848, 624, 396, 710, 525, 183, 686, 10, + 285, 856, 307, 811, 160, 972, 55, 441, 289, 723, 305, 373, 351, 153, 733, + 409, 506, 975, 838, 573, 970, 988, 913, 471, 205, 337, 49, 594, 777, 549, + 815, 277, 27, 916, 333, 353, 844, 800, 146, 751, 186, 375, 769, 358, 392, + 883, 474, 788, 602, 74, 130, 329, 212, 155, 131, 102, 687, 293, 870, 742, + 726, 427, 217, 834, 904, 29, 127, 869, 407, 338, 832, 470, 482, 810, 399, + 439, 393, 604, 929, 682, 447, 714, 251, 455, 875, 319, 477, 464, 521, + 258, 377, 937, 489, 792, 172, 314, 327, 124, 20, 531, 953, 591, 886, 320, + 696, 71, 859, 578, 175, 587, 707, 663, 283, 179, 795, 989, 702, 940, 371, + 692, 689, 555, 903, 410, 651, 75, 429, 818, 362, 894, 515, 31, 545, 666, + 706, 952, 864, 269, 254, 349, 711, 802, 716, 784, 1007, 925, 801, 445, + 148, 260, 658, 385, 287, 262, 204, 126, 586, 1004, 236, 165, 854, 411, + 932, 560, 19, 215, 1002, 775, 653, 928, 901, 964, 884, 798, 839, 786, + 433, 610, 116, 855, 180, 479, 910, 1014, 599, 915, 905, 306, 516, 731, + 626, 978, 825, 344, 605, 654, 209 +}; +static_assert(std::size(GF1024_EXP) == 1023, "GF1024_EXP length should be 1023"); + +const int16_t GF1024_LOG[] = { + -1, 0, 99, 363, 198, 726, 462, 132, 297, 495, 825, 528, 561, 693, 231, + 66, 396, 429, 594, 990, 924, 264, 627, 33, 660, 759, 792, 858, 330, 891, + 165, 957, 104, 259, 518, 208, 280, 776, 416, 13, 426, 333, 618, 339, 641, + 52, 388, 140, 666, 852, 529, 560, 678, 213, 26, 832, 681, 309, 70, 194, + 97, 35, 682, 341, 203, 777, 358, 312, 617, 125, 307, 931, 379, 765, 875, + 951, 515, 628, 112, 659, 525, 196, 432, 134, 717, 781, 438, 440, 740, + 780, 151, 408, 487, 169, 239, 293, 467, 21, 672, 622, 557, 571, 881, 433, + 704, 376, 779, 22, 643, 460, 398, 116, 172, 503, 751, 389, 1004, 18, 576, + 415, 789, 6, 192, 696, 923, 702, 981, 892, 302, 816, 876, 880, 457, 537, + 411, 539, 716, 624, 224, 295, 406, 531, 7, 233, 478, 586, 864, 268, 974, + 338, 27, 392, 614, 839, 727, 879, 211, 250, 758, 507, 830, 129, 369, 384, + 36, 985, 12, 555, 232, 796, 221, 321, 920, 263, 42, 934, 778, 479, 761, + 939, 1006, 344, 381, 823, 44, 535, 866, 739, 752, 385, 119, 91, 566, 80, + 120, 117, 771, 675, 721, 514, 656, 271, 670, 602, 980, 850, 532, 488, + 803, 1022, 475, 801, 878, 57, 121, 991, 742, 888, 559, 105, 497, 291, + 215, 795, 236, 167, 692, 520, 272, 661, 229, 391, 814, 340, 184, 798, + 984, 773, 650, 473, 345, 558, 548, 326, 202, 145, 465, 810, 471, 158, + 813, 908, 412, 441, 964, 750, 401, 50, 915, 437, 975, 126, 979, 491, 556, + 577, 636, 685, 510, 963, 638, 367, 815, 310, 723, 349, 323, 857, 394, + 606, 505, 713, 630, 938, 106, 826, 332, 978, 599, 834, 521, 530, 248, + 883, 32, 153, 90, 754, 592, 304, 635, 775, 804, 1, 150, 836, 1013, 828, + 324, 565, 508, 113, 154, 708, 921, 703, 689, 138, 547, 911, 929, 82, 228, + 443, 468, 480, 483, 922, 135, 877, 61, 578, 111, 860, 654, 15, 331, 851, + 895, 484, 320, 218, 420, 190, 1019, 143, 362, 634, 141, 965, 10, 838, + 632, 861, 34, 722, 580, 808, 869, 554, 598, 65, 954, 787, 337, 187, 281, + 146, 563, 183, 668, 944, 171, 837, 23, 867, 541, 916, 741, 625, 123, 736, + 186, 357, 665, 977, 179, 156, 219, 220, 216, 67, 870, 902, 774, 98, 820, + 574, 613, 900, 755, 596, 370, 390, 769, 314, 701, 894, 56, 841, 949, 987, + 631, 658, 587, 204, 797, 790, 522, 745, 9, 502, 763, 86, 719, 288, 706, + 887, 728, 952, 311, 336, 446, 1002, 348, 96, 58, 199, 11, 901, 230, 833, + 188, 352, 351, 973, 3, 906, 335, 301, 266, 244, 791, 564, 619, 909, 371, + 444, 760, 657, 328, 647, 490, 425, 913, 511, 439, 540, 283, 40, 897, 849, + 60, 570, 872, 257, 749, 912, 572, 1007, 170, 407, 898, 492, 79, 747, 732, + 206, 454, 918, 375, 482, 399, 92, 748, 325, 163, 274, 405, 744, 260, 346, + 707, 626, 595, 118, 842, 136, 279, 684, 584, 101, 500, 422, 149, 956, + 1014, 493, 536, 705, 51, 914, 225, 409, 55, 822, 590, 448, 655, 205, 676, + 925, 735, 431, 784, 54, 609, 604, 39, 812, 737, 729, 466, 14, 533, 958, + 481, 770, 499, 855, 238, 182, 464, 569, 72, 947, 442, 642, 24, 87, 989, + 688, 88, 47, 762, 623, 709, 455, 817, 526, 637, 258, 84, 845, 738, 768, + 698, 423, 933, 664, 620, 607, 629, 212, 347, 249, 982, 935, 131, 89, 252, + 927, 189, 788, 853, 237, 691, 646, 403, 1010, 734, 253, 874, 807, 903, + 1020, 100, 802, 71, 799, 1003, 633, 355, 276, 300, 649, 64, 306, 161, + 608, 496, 743, 180, 485, 819, 383, 1016, 226, 308, 393, 648, 107, 19, 37, + 585, 2, 175, 645, 247, 527, 5, 419, 181, 317, 327, 519, 542, 289, 567, + 430, 579, 950, 582, 994, 1021, 583, 234, 240, 976, 41, 160, 109, 677, + 937, 210, 95, 959, 242, 753, 461, 114, 733, 368, 573, 458, 782, 605, 680, + 544, 299, 73, 652, 905, 477, 690, 93, 824, 882, 277, 946, 361, 17, 945, + 523, 472, 334, 930, 597, 603, 793, 404, 290, 942, 316, 731, 270, 960, + 936, 133, 122, 821, 966, 679, 662, 907, 282, 968, 767, 653, 20, 697, 222, + 164, 835, 30, 285, 886, 456, 436, 640, 286, 1015, 380, 840, 245, 724, + 137, 593, 173, 130, 715, 85, 885, 551, 246, 449, 103, 366, 372, 714, 313, + 865, 241, 699, 674, 374, 68, 421, 562, 292, 59, 809, 342, 651, 459, 227, + 46, 711, 764, 868, 53, 413, 278, 800, 255, 993, 318, 854, 319, 695, 315, + 469, 166, 489, 969, 730, 1001, 757, 873, 686, 197, 303, 919, 155, 673, + 940, 712, 25, 999, 63, 863, 972, 967, 785, 152, 296, 512, 402, 377, 45, + 899, 829, 354, 77, 69, 856, 417, 811, 953, 124, 418, 75, 794, 162, 414, + 1018, 568, 254, 265, 772, 588, 16, 896, 157, 889, 298, 621, 110, 844, + 1000, 108, 545, 601, 78, 862, 447, 185, 195, 818, 450, 387, 49, 805, 102, + 986, 1005, 827, 329, 28, 932, 410, 287, 435, 451, 962, 517, 48, 174, 43, + 893, 884, 261, 251, 516, 395, 910, 611, 29, 501, 223, 476, 364, 144, 871, + 998, 687, 928, 115, 453, 513, 176, 94, 168, 667, 955, 353, 434, 382, 400, + 139, 365, 996, 343, 948, 890, 1012, 663, 610, 718, 538, 1008, 639, 470, + 848, 543, 1011, 859, 671, 756, 83, 427, 159, 746, 669, 589, 971, 524, + 356, 995, 904, 256, 201, 988, 62, 397, 81, 720, 917, 209, 549, 943, 486, + 76, 148, 207, 509, 644, 386, 700, 534, 177, 550, 961, 926, 546, 428, 284, + 127, 294, 8, 269, 359, 506, 445, 997, 806, 591, 725, 178, 262, 846, 373, + 831, 504, 305, 843, 553, 378, 1017, 783, 474, 683, 581, 200, 498, 694, + 191, 217, 847, 941, 424, 235, 38, 74, 616, 786, 147, 4, 273, 214, 142, + 575, 992, 463, 983, 243, 360, 970, 350, 267, 615, 766, 494, 31, 1009, + 452, 710, 552, 128, 612, 600, 275, 322, 193 +}; +static_assert(std::size(GF1024_LOG) == 1024, "GF1024_EXP length should be 1024"); + /* Determine the final constant to use for the specified encoding. */ uint32_t EncodingConstant(Encoding encoding) { assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M); @@ -127,12 +276,73 @@ uint32_t PolyMod(const data& v) return c; } +uint32_t Syndrome(const uint32_t residue) { + uint32_t low = residue & 0x1f; + return low ^ (low << 10) ^ (low << 20) ^ + ((residue >> 5) & 1 ? 0x31edd3c4 : 0) ^ + ((residue >> 6) & 1 ? 0x335f86a8 : 0) ^ + ((residue >> 7) & 1 ? 0x363b8870 : 0) ^ + ((residue >> 8) & 1 ? 0x3e6390c9 : 0) ^ + ((residue >> 9) & 1 ? 0x2ec72192 : 0) ^ + ((residue >> 10) & 1 ? 0x1046f79d : 0) ^ + ((residue >> 11) & 1 ? 0x208d4e33 : 0) ^ + ((residue >> 12) & 1 ? 0x130ebd6f : 0) ^ + ((residue >> 13) & 1 ? 0x2499fade : 0) ^ + ((residue >> 14) & 1 ? 0x1b27d4b5 : 0) ^ + ((residue >> 15) & 1 ? 0x04be1eb4 : 0) ^ + ((residue >> 16) & 1 ? 0x0968b861 : 0) ^ + ((residue >> 17) & 1 ? 0x1055f0c2 : 0) ^ + ((residue >> 18) & 1 ? 0x20ab4584 : 0) ^ + ((residue >> 19) & 1 ? 0x1342af08 : 0) ^ + ((residue >> 20) & 1 ? 0x24f1f318 : 0) ^ + ((residue >> 21) & 1 ? 0x1be34739 : 0) ^ + ((residue >> 22) & 1 ? 0x35562f7b : 0) ^ + ((residue >> 23) & 1 ? 0x3a3c5bff : 0) ^ + ((residue >> 24) & 1 ? 0x266c96f7 : 0) ^ + ((residue >> 25) & 1 ? 0x25c78b65 : 0) ^ + ((residue >> 26) & 1 ? 0x1b1f13ea : 0) ^ + ((residue >> 27) & 1 ? 0x34baa2f4 : 0) ^ + ((residue >> 28) & 1 ? 0x3b61c0e1 : 0) ^ + ((residue >> 29) & 1 ? 0x265325c2 : 0); +} + /** Convert to lower case. */ inline unsigned char LowerCase(unsigned char c) { return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c; } +void push_range(int from, int to, std::vector& vec) +{ + for (int i = from; i < to; i++) { + vec.push_back(i); + } +} + +/** Return index of first invalid character in a Bech32 string. */ +bool CheckCharacters(const std::string& str, std::vector& errors) { + bool lower = false, upper = false; + for (size_t i = 0; i < str.size(); ++i) { + unsigned char c = str[i]; + if (c >= 'a' && c <= 'z') { + if (upper) { + errors.push_back(i); + } else { + lower = true; + } + } else if (c >= 'A' && c <= 'Z') { + if (lower) { + errors.push_back(i); + } else { + upper = true; + } + } else if (c < 33 || c > 126) { + errors.push_back(i); + } + } + return errors.empty(); +} + /** Expand a HRP for use in checksum computation. */ data ExpandHRP(const std::string& hrp) { @@ -196,14 +406,8 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values /** Decode a Bech32 or Bech32m string. */ DecodeResult Decode(const std::string& str) { - bool lower = false, upper = false; - for (size_t i = 0; i < str.size(); ++i) { - unsigned char c = str[i]; - if (c >= 'a' && c <= 'z') lower = true; - else if (c >= 'A' && c <= 'Z') upper = true; - else if (c < 33 || c > 126) return {}; - } - if (lower && upper) return {}; + std::vector errors; + if (!CheckCharacters(str, errors)) return {}; size_t pos = str.rfind('1'); if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) { return {}; @@ -227,4 +431,103 @@ DecodeResult Decode(const std::string& str) { return {result, std::move(hrp), data(values.begin(), values.end() - 6)}; } +/** Find index of an incorrect character in a Bech32 string. */ +std::string LocateErrors(const std::string& str, std::vector& error_locations) { + if (str.size() > 90) { + push_range(90, str.size(), error_locations); + return "Bech32 string too long"; + } + if (!CheckCharacters(str, error_locations)){ + return "Invalid character or mixed case"; + } + size_t pos = str.rfind('1'); + if (pos == str.npos) { + return "Missing separator"; + } + if (pos == 0 || pos + 7 > str.size()) { + error_locations.push_back(pos); + return "Invalid separator position"; + } + std::string hrp; + for (size_t i = 0; i < pos; ++i) { + hrp += LowerCase(str[i]); + } + + size_t length = str.size() - 1 - pos; // length of data part + data values(length); + for (size_t i = pos + 1; i < str.size(); ++i) { + unsigned char c = str[i]; + int8_t rev = CHARSET_REV[c]; + if (rev == -1) { + error_locations.push_back(i); + return "Invalid Base 32 character"; + } + values[i - pos - 1] = rev; + } + + // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors + // We can't simply use the segwit version, because that may be one of the errors + for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) { + std::vector possible_errors; + uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding); + if (residue != 0) { + uint32_t syn = Syndrome(residue); + int s0 = syn & 0x3FF; + int s1 = (syn >> 10) & 0x3FF; + int s2 = syn >> 20; + int l_s0 = GF1024_LOG[s0]; + int l_s1 = GF1024_LOG[s1]; + int l_s2 = GF1024_LOG[s2]; + + if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) { + size_t p1 = (l_s1 - l_s0 + 1023) % 1023; + int l_e1 = l_s0 + (1023 - 997) * p1; + if (p1 < length && !(l_e1 % 33)) { + possible_errors.push_back(str.size() - p1 - 1); + } + } else { + for (size_t p1 = 0; p1 < length; ++p1) { + int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP[(l_s1 + p1) % 1023]); + if (s2_s1p1 == 0) continue; + + int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p1) % 1023]); + if (s1_s0p1 == 0) continue; + + int l_s1_s0p1 = GF1024_LOG[s1_s0p1]; + size_t p2 = (GF1024_LOG[s2_s1p1] - l_s1_s0p1 + 1023) % 1023; + if (p2 >= length || p1 == p2) continue; + + int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p2) % 1023]); + if (s1_s0p2 == 0) continue; + + int inv_p1_p2 = 1023 - GF1024_LOG[GF1024_EXP[p1] ^ GF1024_EXP[p2]]; + int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2; + if (l_e2 % 33) continue; + + int l_e1 = GF1024_LOG[s1_s0p2] + inv_p1_p2 + (1023 - 997) * p1; + if (l_e1 % 33) continue; + + if (p1 > p2) { + possible_errors.push_back(str.size() - p1 - 1); + possible_errors.push_back(str.size() - p2 - 1); + } else { + possible_errors.push_back(str.size() - p2 - 1); + possible_errors.push_back(str.size() - p1 - 1); + } + break; + } + } + } else { + // No errors + error_locations.clear(); + return ""; + } + + if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) { + error_locations = std::move(possible_errors); + } + } + return "Invalid checksum"; +} + } // namespace bech32 diff --git a/src/bech32.h b/src/bech32.h index e9450ccc2b..f39d99b641 100644 --- a/src/bech32.h +++ b/src/bech32.h @@ -44,6 +44,9 @@ struct DecodeResult /** Decode a Bech32 or Bech32m string. */ DecodeResult Decode(const std::string& str); +/** Return the positions of errors in a Bech32 string. */ +std::string LocateErrors(const std::string& str, std::vector& error_locations); + } // namespace bech32 #endif // BITCOIN_BECH32_H diff --git a/src/key_io.cpp b/src/key_io.cpp index 9e246c0b81..6908c5ea52 100644 --- a/src/key_io.cpp +++ b/src/key_io.cpp @@ -76,7 +76,7 @@ class DestinationEncoder std::string operator()(const CNoDestination& no) const { return {}; } }; -CTxDestination DecodeDestination(const std::string& str, const CChainParams& params, std::string& error_str) +CTxDestination DecodeDestination(const std::string& str, const CChainParams& params, std::string& error_str, std::vector* error_locations) { std::vector data; uint160 hash; @@ -184,8 +184,13 @@ CTxDestination DecodeDestination(const std::string& str, const CChainParams& par } } - //TODO: locate Bech32 errors - error_str = "Error in Bech32 encoding"; + // Perform Bech32 error location + if (!error_locations) { + std::vector dummy_errors; + error_str = bech32::LocateErrors(str, dummy_errors); + } else { + error_str = bech32::LocateErrors(str, *error_locations); + } return CNoDestination(); } @@ -274,9 +279,9 @@ std::string EncodeDestination(const CTxDestination& dest) return std::visit(DestinationEncoder(Params()), dest); } -CTxDestination DecodeDestination(const std::string& str, std::string& error_msg) +CTxDestination DecodeDestination(const std::string& str, std::string& error_msg, std::vector* error_locations) { - return DecodeDestination(str, Params(), error_msg); + return DecodeDestination(str, Params(), error_msg, error_locations); } CTxDestination DecodeDestination(const std::string& str) @@ -288,7 +293,7 @@ CTxDestination DecodeDestination(const std::string& str) bool IsValidDestinationString(const std::string& str, const CChainParams& params) { std::string error_msg; - return IsValidDestination(DecodeDestination(str, params, error_msg)); + return IsValidDestination(DecodeDestination(str, params, error_msg, nullptr)); } bool IsValidDestinationString(const std::string& str) diff --git a/src/key_io.h b/src/key_io.h index bd81f7847e..2062bb4c44 100644 --- a/src/key_io.h +++ b/src/key_io.h @@ -23,7 +23,7 @@ std::string EncodeExtPubKey(const CExtPubKey& extpubkey); std::string EncodeDestination(const CTxDestination& dest); CTxDestination DecodeDestination(const std::string& str); -CTxDestination DecodeDestination(const std::string& str, std::string& error_msg); +CTxDestination DecodeDestination(const std::string& str, std::string& error_msg, std::vector* error_locations = nullptr); bool IsValidDestinationString(const std::string& str); bool IsValidDestinationString(const std::string& str, const CChainParams& params); From 02a7bdee429ae307a5e57832727fed789e2e04fb Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Fri, 1 Oct 2021 12:51:18 +1300 Subject: [PATCH 3/8] Add error_locations to validateaddress RPC --- src/rpc/misc.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/rpc/misc.cpp b/src/rpc/misc.cpp index 39bd9c6091..2bd8a6b050 100644 --- a/src/rpc/misc.cpp +++ b/src/rpc/misc.cpp @@ -52,6 +52,10 @@ static RPCHelpMan validateaddress() {RPCResult::Type::NUM, "witness_version", /* optional */ true, "The version number of the witness program"}, {RPCResult::Type::STR_HEX, "witness_program", /* optional */ true, "The hex value of the witness program"}, {RPCResult::Type::STR, "error", /* optional */ true, "Error message, if any"}, + {RPCResult::Type::ARR, "error_locations", "Indices of likely error locations in address, if known (e.g. Bech32 errors)", + { + {RPCResult::Type::NUM, "index", "index of a potential error"}, + }}, } }, RPCExamples{ @@ -61,7 +65,8 @@ static RPCHelpMan validateaddress() [&](const RPCHelpMan& self, const JSONRPCRequest& request) -> UniValue { std::string error_msg; - CTxDestination dest = DecodeDestination(request.params[0].get_str(), error_msg); + std::vector error_locations; + CTxDestination dest = DecodeDestination(request.params[0].get_str(), error_msg, &error_locations); const bool isValid = IsValidDestination(dest); CHECK_NONFATAL(isValid == error_msg.empty()); @@ -77,6 +82,9 @@ static RPCHelpMan validateaddress() UniValue detail = DescribeAddress(dest); ret.pushKVs(detail); } else { + UniValue error_indices(UniValue::VARR); + for (int i : error_locations) error_indices.push_back(i); + ret.pushKV("error_locations", error_indices); ret.pushKV("error", error_msg); } From c4979f77c1264f0099d1dfa278b1d9c18340b5f9 Mon Sep 17 00:00:00 2001 From: MeshCollider Date: Tue, 20 Apr 2021 17:24:14 +1200 Subject: [PATCH 4/8] Add boost tests for bech32 error detection --- src/test/bech32_tests.cpp | 54 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/test/bech32_tests.cpp b/src/test/bech32_tests.cpp index c0344b3cbb..16cda6a976 100644 --- a/src/test/bech32_tests.cpp +++ b/src/test/bech32_tests.cpp @@ -68,10 +68,36 @@ BOOST_AUTO_TEST_CASE(bech32_testvectors_invalid) "1qzzfhee", "a12UEL5L", "A12uEL5L", + "abcdef1qpzrz9x8gf2tvdw0s3jn54khce6mua7lmqqqxw", }; + static const std::pair ERRORS[] = { + {"Invalid character or mixed case", 0}, + {"Invalid character or mixed case", 0}, + {"Invalid character or mixed case", 0}, + {"Bech32 string too long", 90}, + {"Missing separator", -1}, + {"Invalid separator position", 0}, + {"Invalid Base 32 character", 2}, + {"Invalid separator position", 2}, + {"Invalid character or mixed case", 8}, + {"Invalid checksum", -1}, // The checksum is calculated using the uppercase form so the entire string is invalid, not just a few characters + {"Invalid separator position", 0}, + {"Invalid separator position", 0}, + {"Invalid character or mixed case", 3}, + {"Invalid character or mixed case", 3}, + {"Invalid checksum", 11} + }; + int i = 0; for (const std::string& str : CASES) { + const auto& err = ERRORS[i]; const auto dec = bech32::Decode(str); BOOST_CHECK(dec.encoding == bech32::Encoding::INVALID); + std::vector error_locations; + std::string error = bech32::LocateErrors(str, error_locations); + BOOST_CHECK_EQUAL(err.first, error); + if (err.second == -1) BOOST_CHECK(error_locations.empty()); + else BOOST_CHECK_EQUAL(err.second, error_locations[0]); + i++; } } @@ -91,11 +117,37 @@ BOOST_AUTO_TEST_CASE(bech32m_testvectors_invalid) "au1s5cgom", "M1VUXWEZ", "16plkw9", - "1p2gdwpf" + "1p2gdwpf", + "abcdef1l7aum6echk45nj2s0wdvt2fg8x9yrzpqzd3ryx", + }; + static const std::pair ERRORS[] = { + {"Invalid character or mixed case", 0}, + {"Invalid character or mixed case", 0}, + {"Invalid character or mixed case", 0}, + {"Bech32 string too long", 90}, + {"Missing separator", -1}, + {"Invalid separator position", 0}, + {"Invalid Base 32 character", 2}, + {"Invalid Base 32 character", 3}, + {"Invalid separator position", 2}, + {"Invalid Base 32 character", 8}, + {"Invalid Base 32 character", 7}, + {"Invalid checksum", -1}, + {"Invalid separator position", 0}, + {"Invalid separator position", 0}, + {"Invalid checksum", 21}, }; + int i = 0; for (const std::string& str : CASES) { + const auto& err = ERRORS[i]; const auto dec = bech32::Decode(str); BOOST_CHECK(dec.encoding == bech32::Encoding::INVALID); + std::vector error_locations; + std::string error = bech32::LocateErrors(str, error_locations); + BOOST_CHECK_EQUAL(err.first, error); + if (err.second == -1) BOOST_CHECK(error_locations.empty()); + else BOOST_CHECK_EQUAL(err.second, error_locations[0]); + i++; } } From 42d6a029e57a32f2d1d829ff7718b6d40d58b9d1 Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Fri, 1 Oct 2021 14:17:18 +1300 Subject: [PATCH 5/8] Refactor and add more tests for validateaddress --- .../functional/rpc_invalid_address_message.py | 95 +++++++++++-------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/test/functional/rpc_invalid_address_message.py b/test/functional/rpc_invalid_address_message.py index 38008cbf42..9e0c232ff6 100755 --- a/test/functional/rpc_invalid_address_message.py +++ b/test/functional/rpc_invalid_address_message.py @@ -12,68 +12,85 @@ ) BECH32_VALID = 'bcrt1qtmp74ayg7p24uslctssvjm06q5phz4yrxucgnv' +BECH32_VALID_CAPITALS = 'BCRT1QPLMTZKC2XHARPPZDLNPAQL78RSHJ68U33RAH7R' +BECH32_VALID_MULTISIG = 'bcrt1qdg3myrgvzw7ml9q0ejxhlkyxm7vl9r56yzkfgvzclrf4hkpx9yfqhpsuks' + BECH32_INVALID_BECH32 = 'bcrt1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqdmchcc' BECH32_INVALID_BECH32M = 'bcrt1qw508d6qejxtdg4y5r3zarvary0c5xw7k35mrzd' BECH32_INVALID_VERSION = 'bcrt130xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqynjegk' BECH32_INVALID_SIZE = 'bcrt1s0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7v8n0nx0muaewav25430mtr' BECH32_INVALID_V0_SIZE = 'bcrt1qw508d6qejxtdg4y5r3zarvary0c5xw7kqqq5k3my' BECH32_INVALID_PREFIX = 'bc1pw508d6qejxtdg4y5r3zarvary0c5xw7kw508d6qejxtdg4y5r3zarvary0c5xw7k7grplx' +BECH32_TOO_LONG = 'bcrt1q049edschfnwystcqnsvyfpj23mpsg3jcedq9xv049edschfnwystcqnsvyfpj23mpsg3jcedq9xv049edschfnwystcqnsvyfpj23m' +BECH32_ONE_ERROR = 'bcrt1q049edschfnwystcqnsvyfpj23mpsg3jcedq9xv' +BECH32_ONE_ERROR_CAPITALS = 'BCRT1QPLMTZKC2XHARPPZDLNPAQL78RSHJ68U32RAH7R' +BECH32_TWO_ERRORS = 'bcrt1qax9suht3qv95sw33xavx8crpxduefdrsvgsklu' # should be bcrt1qax9suht3qv95sw33wavx8crpxduefdrsvgsklx +BECH32_NO_SEPARATOR = 'bcrtq049ldschfnwystcqnsvyfpj23mpsg3jcedq9xv' +BECH32_INVALID_CHAR = 'bcrt1q04oldschfnwystcqnsvyfpj23mpsg3jcedq9xv' +BECH32_MULTISIG_TWO_ERRORS = 'bcrt1qdg3myrgvzw7ml8q0ejxhlkyxn7vl9r56yzkfgvzclrf4hkpx9yfqhpsuks' +BECH32_WRONG_VERSION = 'bcrt1ptmp74ayg7p24uslctssvjm06q5phz4yrxucgnv' BASE58_VALID = 'mipcBbFg9gMiCh81Kj8tqqdgoZub1ZJRfn' BASE58_INVALID_PREFIX = '17VZNX1SN5NtKa8UQFxwQbFeFc3iqRYhem' +BASE58_INVALID_CHECKSUM = 'mipcBbFg9gMiCh81Kj8tqqdgoZub1ZJJfn' +BASE58_INVALID_LENGTH = '2VKf7XKMrp4bVNVmuRbyCewkP8FhGLP2E54LHDPakr9Sq5mtU2' INVALID_ADDRESS = 'asfah14i8fajz0123f' +INVALID_ADDRESS_2 = '1q049ldschfnwystcqnsvyfpj23mpsg3jcedq9xv' class InvalidAddressErrorMessageTest(BitcoinTestFramework): def set_test_params(self): self.setup_clean_chain = True self.num_nodes = 1 - def test_validateaddress(self): - node = self.nodes[0] - - # Bech32 - info = node.validateaddress(BECH32_INVALID_SIZE) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid Bech32 address data size') - - info = node.validateaddress(BECH32_INVALID_PREFIX) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid HRP or Base58 character in address') - - info = node.validateaddress(BECH32_INVALID_BECH32) - assert not info['isvalid'] - assert_equal(info['error'], 'Version 1+ witness address must use Bech32m checksum') - - info = node.validateaddress(BECH32_INVALID_BECH32M) - assert not info['isvalid'] - assert_equal(info['error'], 'Version 0 witness address must use Bech32 checksum') - - info = node.validateaddress(BECH32_INVALID_V0_SIZE) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid Bech32 v0 address data size') - - info = node.validateaddress(BECH32_VALID) + def check_valid(self, addr): + info = self.nodes[0].validateaddress(addr) assert info['isvalid'] assert 'error' not in info + assert 'error_locations' not in info - info = node.validateaddress(BECH32_INVALID_VERSION) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid Bech32 address witness version') - - # Base58 - info = node.validateaddress(BASE58_INVALID_PREFIX) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid prefix for Base58-encoded address') + def check_invalid(self, addr, error_str, error_locations=None): + res = self.nodes[0].validateaddress(addr) + assert not res['isvalid'] + assert_equal(res['error'], error_str) + if error_locations: + assert_equal(res['error_locations'], error_locations) + else: + assert_equal(res['error_locations'], []) - info = node.validateaddress(BASE58_VALID) - assert info['isvalid'] - assert 'error' not in info + def test_validateaddress(self): + # Invalid Bech32 + self.check_invalid(BECH32_INVALID_SIZE, 'Invalid Bech32 address data size') + self.check_invalid(BECH32_INVALID_PREFIX, 'Invalid HRP or Base58 character in address') + self.check_invalid(BECH32_INVALID_BECH32, 'Version 1+ witness address must use Bech32m checksum') + self.check_invalid(BECH32_INVALID_BECH32M, 'Version 0 witness address must use Bech32 checksum') + self.check_invalid(BECH32_INVALID_VERSION, 'Invalid Bech32 address witness version') + self.check_invalid(BECH32_INVALID_V0_SIZE, 'Invalid Bech32 v0 address data size') + self.check_invalid(BECH32_TOO_LONG, 'Bech32 string too long', list(range(90, 108))) + self.check_invalid(BECH32_ONE_ERROR, 'Invalid checksum', [9]) + self.check_invalid(BECH32_TWO_ERRORS, 'Invalid checksum', [22, 43]) + self.check_invalid(BECH32_ONE_ERROR_CAPITALS, 'Invalid checksum', [38]) + self.check_invalid(BECH32_NO_SEPARATOR, 'Missing separator') + self.check_invalid(BECH32_INVALID_CHAR, 'Invalid Base 32 character', [8]) + self.check_invalid(BECH32_MULTISIG_TWO_ERRORS, 'Invalid checksum', [19, 30]) + self.check_invalid(BECH32_WRONG_VERSION, 'Invalid checksum', [5]) + + # Valid Bech32 + self.check_valid(BECH32_VALID) + self.check_valid(BECH32_VALID_CAPITALS) + self.check_valid(BECH32_VALID_MULTISIG) + + # Invalid Base58 + self.check_invalid(BASE58_INVALID_PREFIX, 'Invalid prefix for Base58-encoded address') + self.check_invalid(BASE58_INVALID_CHECKSUM, 'Invalid checksum or length of Base58 address') + self.check_invalid(BASE58_INVALID_LENGTH, 'Invalid checksum or length of Base58 address') + + # Valid Base58 + self.check_valid(BASE58_VALID) # Invalid address format - info = node.validateaddress(INVALID_ADDRESS) - assert not info['isvalid'] - assert_equal(info['error'], 'Invalid HRP or Base58 character in address') + self.check_invalid(INVALID_ADDRESS, 'Invalid HRP or Base58 character in address') + self.check_invalid(INVALID_ADDRESS_2, 'Invalid HRP or Base58 character in address') def test_getaddressinfo(self): node = self.nodes[0] From 2eb5792ec7bbeaf7138420b6c85c5cd0a0404946 Mon Sep 17 00:00:00 2001 From: MeshCollider Date: Tue, 20 Apr 2021 17:19:04 +1200 Subject: [PATCH 6/8] Add release notes for validateaddress Bech32 error detection --- doc/release-notes-16807.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release-notes-16807.md diff --git a/doc/release-notes-16807.md b/doc/release-notes-16807.md new file mode 100644 index 0000000000..5027550a99 --- /dev/null +++ b/doc/release-notes-16807.md @@ -0,0 +1,6 @@ +Updated RPCs +------------ + +- The `validateaddress` RPC now optionally returns an `error_locations` array, with the indices of +invalid characters in the address. For example, this will return the locations of up to two Bech32 +errors. \ No newline at end of file From 5599813b80e53a1539c66625b4320ab1b4fb4848 Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Thu, 7 Oct 2021 01:16:08 +1300 Subject: [PATCH 7/8] Add lots of comments to Bech32 --- src/bech32.cpp | 139 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 4 deletions(-) diff --git a/src/bech32.cpp b/src/bech32.cpp index 905a2aa2c0..1b2d47243b 100644 --- a/src/bech32.cpp +++ b/src/bech32.cpp @@ -30,6 +30,33 @@ const int8_t CHARSET_REV[128] = { 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 }; +// We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32) +// The defining polynomial of the extension is x^2 + 9x + 23 +// Let (e) be a primitive element of GF(1024), that is, a generator of the field. +// Every non-zero element of the field can then be represented as (e)^k for some power k. +// The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024). +// Conversely, GF1024_LOG contains the discrete logarithms of these powers, so +// GF1024_LOG[GF1024_EXP[k]] == k +// Each element v of GF(1024) is encoded as a 10 bit integer in the following way: +// v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)). +// +// The element (e) is encoded as 9 || 15. Given (v), we compute (e)*(v) by multiplying in the following way: +// v0' = 27*v1 + 15*v0 +// v1' = 6*v1 + 9*v0 +// e*v = v1' || v0' +// +// The following sage code can be used to reproduce both _EXP and _LOG arrays +// GF1024_LOG = [-1] + [0] * 1023 +// GF1024_EXP = [1] * 1024 +// v = 1 +// for i in range(1, 1023): +// v0 = v & 31 +// v1 = v >> 5 +// v0n = F.fetch_int(27)*F.fetch_int(v1) + F.fetch_int(15)*F.fetch_int(v0) +// v1n = F.fetch_int(6)*F.fetch_int(v1) + F.fetch_int(9)*F.fetch_int(v0) +// v = v1n.integer_representation() << 5 | v0n.integer_representation() +// GF1024_EXP[i] = v +// GF1024_LOG[v] = i const int16_t GF1024_EXP[] = { 1, 303, 635, 446, 997, 640, 121, 142, 959, 420, 350, 438, 166, 39, 543, @@ -103,6 +130,7 @@ const int16_t GF1024_EXP[] = { 433, 610, 116, 855, 180, 479, 910, 1014, 599, 915, 905, 306, 516, 731, 626, 978, 825, 344, 605, 654, 209 }; +// As above, GF1024_EXP contains all elements of GF(1024) except 0 static_assert(std::size(GF1024_EXP) == 1023, "GF1024_EXP length should be 1023"); const int16_t GF1024_LOG[] = { @@ -276,8 +304,51 @@ uint32_t PolyMod(const data& v) return c; } +/** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the + * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999. + * + * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed + * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid + * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all + * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j). + * + * Syndrome returns the three values packed into a 30-bit integer, where each 10 bits is one value. + */ uint32_t Syndrome(const uint32_t residue) { + // Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6 + // low is the first 5 bits, corresponding to the r6 in the residue + // (the constant term of the polynomial). + uint32_t low = residue & 0x1f; + + // Recall that XOR corresponds to addition in a characteristic 2 field. + // + // To compute R((e)^j), we are really computing: + // r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6 + // Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed + // for efficiency. But even more than that, we can consider each coefficient as a bit-string. + // For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then: + // r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j) + // where all the (2^i*(e)^j) are constants and can be precomputed. Then we just add each + // of these corresponding constants to our final value based on the bit values b_i. + // This is exactly what is done below. Note that all three values of s_j for j in (997, 998, + // 999) are computed simultaneously. + // + // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional. + // Then for each following bit, we add the corresponding precomputed constant if the bit is 1. + // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10 + // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in + // GF1024_EXP above). + // + // The following sage code reproduces these constants: + // for k in range(1, 6): + // for b in [1,2,4,8,16]: + // c0 = GF1024_EXP[(997*k + GF1024_LOG[b]) % 1023] + // c1 = GF1024_EXP[(998*k + GF1024_LOG[b]) % 1023] + // c2 = GF1024_EXP[(999*k + GF1024_LOG[b]) % 1023] + // c = c2 << 20 | c1 << 10 | c0 + // print("0x%x" % c) + return low ^ (low << 10) ^ (low << 20) ^ ((residue >> 5) & 1 ? 0x31edd3c4 : 0) ^ ((residue >> 6) & 1 ? 0x335f86a8 : 0) ^ @@ -469,44 +540,104 @@ std::string LocateErrors(const std::string& str, std::vector& error_locatio // We can't simply use the segwit version, because that may be one of the errors for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) { std::vector possible_errors; + // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial + // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x). uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding); + + // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding + // constant) should be 0 - hence 0 indicates there are no errors present. if (residue != 0) { + // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid + // codeword (a multiple of G(x)), and E encodes the errors. uint32_t syn = Syndrome(residue); + + // Unpack the three 10-bit syndrome values int s0 = syn & 0x3FF; int s1 = (syn >> 10) & 0x3FF; int s2 = syn >> 20; + + // Get the discrete logs of these values in GF1024 for more efficient computation int l_s0 = GF1024_LOG[s0]; int l_s1 = GF1024_LOG[s1]; int l_s2 = GF1024_LOG[s2]; + // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1 + // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1) + // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too. + // Hence, s1^2 == s0*s2, which is exactly the condition we check first: if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) { - size_t p1 = (l_s1 - l_s0 + 1023) % 1023; + // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023) + size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive + // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1, + // so 1/((e)^997) = (e)^(1023-997). int l_e1 = l_s0 + (1023 - 997) * p1; + // Finally, some sanity checks on the result: + // - The error position should be within the length of the data + // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements + // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)). if (p1 < length && !(l_e1 % 33)) { + // Polynomials run from highest power to lowest, so the index p1 is from the right. + // We don't return e1 because it is dangerous to suggest corrections to the user, + // the user should check the address themselves. possible_errors.push_back(str.size() - p1 - 1); } + // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2. } else { + // For all possible first error positions p1 for (size_t p1 = 0; p1 < length; ++p1) { + // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so + // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2. + // + // Consider s2 + s1*(e)^p1 + // = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 + // = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 + // (Because we are working in characteristic 2.) + // = e2*(e)^(998*p2) ((e)^p2 + (e)^p1) + // int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP[(l_s1 + p1) % 1023]); if (s2_s1p1 == 0) continue; + int l_s2_s1p1 = GF1024_LOG[s2_s1p1]; + // Similarly, s1 + s0*(e)^p1 + // = e2*(e)^(997*p2) ((e)^p2 + (e)^p1) int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p1) % 1023]); if (s1_s0p1 == 0) continue; - int l_s1_s0p1 = GF1024_LOG[s1_s0p1]; - size_t p2 = (GF1024_LOG[s2_s1p1] - l_s1_s0p1 + 1023) % 1023; + + // So, putting these together, we can compute the second error position as + // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1) + // p2 = log((e)^p2) + size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023; + + // Sanity checks that p2 is a valid position and not the same as p1 if (p2 >= length || p1 == p2) continue; + // Now we want to compute the error values e1 and e2. + // Similar to above, we compute s1 + s0*(e)^p2 + // = e1*(e)^(997*p1) ((e)^p1 + (e)^p2) int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP[(l_s0 + p2) % 1023]); if (s1_s0p2 == 0) continue; + int l_s1_s0p2 = GF1024_LOG[s1_s0p2]; + // And compute (the log of) 1/((e)^p1 + (e)^p2)) int inv_p1_p2 = 1023 - GF1024_LOG[GF1024_EXP[p1] ^ GF1024_EXP[p2]]; + + // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2))) + // = e2*(e)^(997*p2) + // Then recover e2 by dividing by (e)^(997*p2) int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2; + // Check that e2 is in GF(32) if (l_e2 % 33) continue; - int l_e1 = GF1024_LOG[s1_s0p2] + inv_p1_p2 + (1023 - 997) * p1; + // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2))) + // = e1*(e)^(997*p1) + // So recover e1 by dividing by (e)^(997*p1) + int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1; + // Check that e1 is in GF(32) if (l_e1 % 33) continue; + // Again, we do not return e1 or e2 for safety. + // Order the error positions from the left of the string and return them if (p1 > p2) { possible_errors.push_back(str.size() - p1 - 1); possible_errors.push_back(str.size() - p2 - 1); From 88cc4810926e4f5af6757ee1b0eed61abda3d746 Mon Sep 17 00:00:00 2001 From: Samuel Dobson Date: Thu, 7 Oct 2021 14:43:10 +1300 Subject: [PATCH 8/8] Modify copyright header on Bech32 code --- src/bech32.cpp | 1 + src/bech32.h | 1 + src/test/bech32_tests.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/src/bech32.cpp b/src/bech32.cpp index 1b2d47243b..ea44480a6c 100644 --- a/src/bech32.cpp +++ b/src/bech32.cpp @@ -1,4 +1,5 @@ // Copyright (c) 2017, 2021 Pieter Wuille +// Copyright (c) 2021 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. diff --git a/src/bech32.h b/src/bech32.h index f39d99b641..7e92d5d23a 100644 --- a/src/bech32.h +++ b/src/bech32.h @@ -1,4 +1,5 @@ // Copyright (c) 2017, 2021 Pieter Wuille +// Copyright (c) 2021 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. diff --git a/src/test/bech32_tests.cpp b/src/test/bech32_tests.cpp index 16cda6a976..8eed959228 100644 --- a/src/test/bech32_tests.cpp +++ b/src/test/bech32_tests.cpp @@ -1,4 +1,5 @@ // Copyright (c) 2017 Pieter Wuille +// Copyright (c) 2021 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php.