From 8325639a320398dc5e4022634a2c4fc38a8fdba9 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 29 Jun 2017 16:44:29 -0700 Subject: [PATCH 1/8] osd: Fix test op error message Signed-off-by: David Zafman --- src/osd/OSD.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 628b9b945f016..7045e0f137005 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5249,7 +5249,7 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store, if (pool < 0 && isdigit(poolstr[0])) pool = atoll(poolstr.c_str()); if (pool < 0) { - ss << "Invalid pool" << poolstr; + ss << "Invalid pool '" << poolstr << "''"; return; } From c0606b9eea977074b560b44c4cd1a3d8e8bc3e0a Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 29 Jun 2017 17:13:50 -0700 Subject: [PATCH 2/8] test: Add undocumented corrupt-size for testing Signed-off-by: David Zafman --- src/tools/ceph_objectstore_tool.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index 5853f6a04608b..13224f759291a 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -1983,7 +1983,7 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter } int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter, - ObjectStore::Sequencer &osr) + ObjectStore::Sequencer &osr, bool corrupt) { if (ghobj.hobj.is_snapdir()) { cerr << "Can't set the size of a snapdir" << std::endl; @@ -2060,7 +2060,9 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz ::encode(oi, attr, -1); /* fixme: using full features */ ObjectStore::Transaction t; t.setattr(coll, ghobj, OI_ATTR, attr); - t.truncate(coll, ghobj, setsize); + // Only modify object info if we want to corrupt it + if (!corrupt) + t.truncate(coll, ghobj, setsize); if (is_snap) { bufferlist snapattr; snapattr.clear(); @@ -3396,7 +3398,9 @@ int main(int argc, char **argv) } ret = print_obj_info(fs, coll, ghobj, formatter); goto out; - } else if (objcmd == "set-size") { + } else if (objcmd == "set-size" || objcmd == "corrupt-size") { + // Undocumented testing feature + bool corrupt = (objcmd == "corrupt-size"); // Extra arg if (vm.count("arg1") == 0 || vm.count("arg2")) { usage(desc); @@ -3409,7 +3413,7 @@ int main(int argc, char **argv) goto out; } uint64_t size = atoll(arg1.c_str()); - ret = set_size(fs, coll, ghobj, size, formatter, *osr); + ret = set_size(fs, coll, ghobj, size, formatter, *osr, corrupt); goto out; } else if (objcmd == "clear-snapset") { // UNDOCUMENTED: For testing zap SnapSet From 8ad4b291131058bbdb4267f4cad35a40fb905bb4 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 5 Jul 2017 19:14:36 -0700 Subject: [PATCH 3/8] osd: Add whether shard is primary in list-inconsistent-obj Add new field in the client interface Update test case Fixes: http://tracker.ceph.com/issues/18836 Signed-off-by: David Zafman --- doc/rados/command/list-inconsistent-obj.json | 4 + qa/standalone/scrub/osd-scrub-repair.sh | 245 +++++++++++++------ src/common/scrub_types.cc | 13 +- src/common/scrub_types.h | 3 +- src/include/rados/rados_types.hpp | 1 + src/osd/PGBackend.cc | 6 +- src/tools/rados/rados.cc | 1 + 7 files changed, 186 insertions(+), 87 deletions(-) diff --git a/doc/rados/command/list-inconsistent-obj.json b/doc/rados/command/list-inconsistent-obj.json index b9ee1793ff2b7..a7c17ace9be54 100644 --- a/doc/rados/command/list-inconsistent-obj.json +++ b/doc/rados/command/list-inconsistent-obj.json @@ -104,6 +104,9 @@ "osd": { "type": "integer" }, + "primary": { + "type": "boolean" + }, "size": { "type": "integer" }, @@ -164,6 +167,7 @@ }, "required": [ "osd", + "primary", "errors" ] } diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 2a8f7ce9576dd..38051c3938ee3 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -628,14 +628,16 @@ function TEST_corrupt_scrub_replicated() { { "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, "errors": [ "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", @@ -659,12 +661,14 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])", @@ -686,12 +690,14 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d alloc_hint [0 0 0])", @@ -714,14 +720,16 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "oi_attr_corrupted" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -754,7 +762,8 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -768,7 +777,8 @@ function TEST_corrupt_scrub_replicated() { "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])", @@ -791,13 +801,15 @@ function TEST_corrupt_scrub_replicated() { { "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])", @@ -840,7 +852,8 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -867,7 +880,8 @@ function TEST_corrupt_scrub_replicated() { ], "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", @@ -902,7 +916,8 @@ function TEST_corrupt_scrub_replicated() { "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", "size": 1, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -920,7 +935,8 @@ function TEST_corrupt_scrub_replicated() { "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])", "size": 1, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", @@ -995,7 +1011,8 @@ EOF "omap_digest": "0xf5fba2c6", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2d4a11c2", @@ -1005,7 +1022,8 @@ EOF "data_digest_mismatch_oi", "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", @@ -1034,7 +1052,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1043,7 +1062,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [0 0 0])", @@ -1066,14 +1086,16 @@ EOF "omap_digest": "0xa03cef03", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [ "read_error" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03 alloc_hint [0 0 0])", @@ -1095,14 +1117,16 @@ EOF "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x067f306a", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a alloc_hint [0 0 0])", @@ -1124,14 +1148,16 @@ EOF "errors": [ "stat_error" ], - "osd": 0 + "osd": 0, + "primary": false }, { "size": 7, "errors": [ "read_error" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -1156,7 +1182,8 @@ EOF "errors": [ "oi_attr_corrupted" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1165,7 +1192,8 @@ EOF "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "union_shard_errors": [ @@ -1200,7 +1228,8 @@ EOF "omap_digest": "0x2d2a4d6e", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -1216,7 +1245,8 @@ EOF "errors": [ "oi_attr_missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e alloc_hint [0 0 0])", @@ -1243,14 +1273,16 @@ EOF "errors": [ "data_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xf8e11918", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [0 0 0])", @@ -1275,13 +1307,15 @@ EOF "omap_digest": "0x00b35dfd", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd alloc_hint [0 0 0])", @@ -1306,14 +1340,16 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xe2d46ea4", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [0 0 0])", @@ -1338,7 +1374,8 @@ EOF "omap_digest": "0x1a862a41", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1347,7 +1384,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [0 0 0])", @@ -1374,14 +1412,16 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x179c919f", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [0 0 0])", @@ -1406,7 +1446,8 @@ EOF "omap_digest": "0xefced57a", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x2ddbf8f5", @@ -1415,7 +1456,8 @@ EOF "errors": [ "omap_digest_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [0 0 0])", @@ -1462,7 +1504,8 @@ EOF "omap_digest": "0xd6be81dc", "size": 7, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -1491,7 +1534,8 @@ EOF "omap_digest": "0xd6be81dc", "size": 7, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", @@ -1528,7 +1572,8 @@ EOF "omap_digest": "0x2eecc539", "size": 3, "errors": [], - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -1548,7 +1593,8 @@ EOF "omap_digest": "0x2eecc539", "size": 3, "errors": [], - "osd": 1 + "osd": 1, + "primary": true } ], "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", @@ -1682,7 +1728,8 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, @@ -1690,13 +1737,15 @@ function corrupt_scrub_erasure() { "errors": [ "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -1720,20 +1769,23 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "shard": 0, "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -1782,10 +1834,12 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [], "size": 2048, @@ -1819,6 +1873,7 @@ function corrupt_scrub_erasure() { }, { "osd": 2, + "primary": false, "shard": 1, "errors": [], "size": 2048, @@ -1871,7 +1926,8 @@ function corrupt_scrub_erasure() { "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 4096, @@ -1879,13 +1935,15 @@ function corrupt_scrub_erasure() { "errors": [ "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true }, { "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -1945,7 +2003,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, @@ -1954,7 +2013,8 @@ EOF "read_error", "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -1962,7 +2022,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(27'1 client.4155.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -1989,14 +2050,16 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "shard": 0, "errors": [ "missing" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -2004,7 +2067,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(41'3 client.4199.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2055,7 +2119,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "attrs": [ @@ -2090,7 +2155,8 @@ EOF "size": 2048, "errors": [], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "attrs": [ @@ -2125,7 +2191,8 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:5e723e06:::EOBJ4:head(48'6 client.4223.0:1 dirty|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2150,7 +2217,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x00000000", @@ -2160,7 +2228,8 @@ EOF "size_mismatch_oi" ], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x00000000", @@ -2168,7 +2237,8 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4288.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2204,7 +2274,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 9, @@ -2213,7 +2284,8 @@ EOF "read_error", "size_mismatch_oi" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2221,7 +2293,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2248,7 +2321,8 @@ EOF "ec_hash_error" ], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "data_digest": "0x04cfa72f", @@ -2256,7 +2330,8 @@ EOF "size": 2048, "errors": [], "shard": 0, - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2264,7 +2339,8 @@ EOF "size": 2048, "errors": [], "shard": 1, - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:9babd184:::EOBJ2:head(29'2 client.4217.0:1 dirty|data_digest|omap_digest s 7 uv 2 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2288,10 +2364,12 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [ "missing" @@ -2303,7 +2381,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:b197b25d:::EOBJ3:head(37'3 client.4251.0:1 dirty|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", @@ -2354,10 +2433,12 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "osd": 1, + "primary": true, "shard": 0, "errors": [], "size": 2048, @@ -2393,6 +2474,7 @@ EOF }, { "osd": 2, + "primary": false, "shard": 1, "errors": [], "size": 2048, @@ -2449,7 +2531,8 @@ EOF "size": 2048, "errors": [], "shard": 2, - "osd": 0 + "osd": 0, + "primary": false }, { "size": 4096, @@ -2458,7 +2541,8 @@ EOF "size_mismatch_oi", "ec_size_error" ], - "osd": 1 + "osd": 1, + "primary": true }, { "data_digest": "0x04cfa72f", @@ -2466,7 +2550,8 @@ EOF "size": 2048, "shard": 1, "errors": [], - "osd": 2 + "osd": 2, + "primary": false } ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc index f53d8ea3f93de..abc575d7a0db5 100644 --- a/src/common/scrub_types.cc +++ b/src/common/scrub_types.cc @@ -70,8 +70,9 @@ void shard_info_wrapper::set_object(const ScrubMap::object& object) void shard_info_wrapper::encode(bufferlist& bl) const { - ENCODE_START(2, 1, bl); + ENCODE_START(3, 3, bl); ::encode(errors, bl); + ::encode(primary, bl); if (has_shard_missing()) { return; } @@ -87,8 +88,9 @@ void shard_info_wrapper::encode(bufferlist& bl) const void shard_info_wrapper::decode(bufferlist::iterator& bp) { - DECODE_START(2, bp); + DECODE_START(3, bp); ::decode(errors, bp); + ::decode(primary, bp); if (has_shard_missing()) { return; } @@ -98,8 +100,7 @@ void shard_info_wrapper::decode(bufferlist::iterator& bp) ::decode(omap_digest, bp); ::decode(data_digest_present, bp); ::decode(data_digest, bp); - if (struct_v > 1) - ::decode(selected_oi, bp); + ::decode(selected_oi, bp); DECODE_FINISH(bp); } @@ -120,10 +121,12 @@ void inconsistent_obj_wrapper::set_auth_missing(const hobject_t& hoid, const map& maps, map &shard_map, - int &shallow_errors, int &deep_errors) + int &shallow_errors, int &deep_errors, + const pg_shard_t &primary) { for (auto pg_map : maps) { auto oid_object = pg_map.second->objects.find(hoid); + shard_map[pg_map.first].primary = (pg_map.first == primary); if (oid_object == pg_map.second->objects.end()) shard_map[pg_map.first].set_missing(); else diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h index 39a265671c24d..21e557b98aa4c 100644 --- a/src/common/scrub_types.h +++ b/src/common/scrub_types.h @@ -116,7 +116,8 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t { void set_auth_missing(const hobject_t& hoid, const map&, map&, - int &shallow_errors, int &deep_errors); + int &shallow_errors, int &deep_errors, + const pg_shard_t &primary); void set_version(uint64_t ver) { version = ver; } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bp); diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index 9b79f8780ee60..1d5aca85beacf 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -121,6 +121,7 @@ struct shard_info_t : err_t { bool data_digest_present = false; uint32_t data_digest = 0; bool selected_oi = false; + bool primary = false; }; struct osd_shard_t { diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 157b2422ab398..d73bcd30c1a21 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -782,6 +782,8 @@ map::const_iterator } string error_string; auto& shard_info = shard_map[j->first]; + if (j->first == get_parent()->whoami_shard()) + shard_info.primary = true; if (i->second.read_error) { shard_info.set_read_error(); error_string += " read_error"; @@ -929,7 +931,8 @@ void PGBackend::be_compare_scrubmaps( set object_errors; if (auth == maps.end()) { object_error.set_version(0); - object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors); + object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, + deep_errors, get_parent()->whoami_shard()); if (object_error.has_deep_errors()) ++deep_errors; else if (object_error.has_shallow_errors()) @@ -982,6 +985,7 @@ void PGBackend::be_compare_scrubmaps( } else { cur_missing.insert(j->first); shard_map[j->first].set_missing(); + shard_map[j->first].primary = (j->first == get_parent()->whoami_shard()); // Can't have any other errors if there is no information available ++shallow_errors; errorstream << pgid << " shard " << j->first << " missing " << *k diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 9d00403033ed0..5e8684deccd11 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -1448,6 +1448,7 @@ static void dump_inconsistent(const inconsistent_obj_t& inc, f.open_object_section("shard"); auto& osd_shard = shard_info.first; f.dump_int("osd", osd_shard.osd); + f.dump_bool("primary", shard_info.second.primary); auto shard = osd_shard.shard; if (shard != shard_id_t::NO_SHARD) f.dump_unsigned("shard", shard); From 8e2b9a07e0551895809a3fc036aae557fadc74ba Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 13 Jul 2017 09:44:29 -0700 Subject: [PATCH 4/8] osd: Change a check to an assert() since it can't happen anymore Signed-off-by: David Zafman --- src/osd/PGBackend.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index d73bcd30c1a21..a8f2aa66479ef 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -829,11 +829,8 @@ map::const_iterator goto out; } - if (oi.soid != obj) { - shard_info.set_oi_attr_corrupted(); - error_string += " oi_attr_corrupted"; - goto out; - } + // This is automatically corrected in PG::_repair_oinfo_oid() + assert(oi.soid == obj); if (auth_version != eversion_t()) { if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) { From 437e5cf1067658912fe15859d18615c733c84f1a Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 13 Jul 2017 09:45:21 -0700 Subject: [PATCH 5/8] osd: Compare all object info even when can't consider for auth copy Signed-off-by: David Zafman --- src/osd/PGBackend.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index a8f2aa66479ef..312e9a7a44c1e 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -758,7 +758,7 @@ map::const_iterator inconsistent_obj_wrapper &object_error) { eversion_t auth_version; - bufferlist auth_bl; + bufferlist first_bl; // Create list of shards with primary last so it will be auth copy all // other things being equal. @@ -832,11 +832,11 @@ map::const_iterator // This is automatically corrected in PG::_repair_oinfo_oid() assert(oi.soid == obj); - if (auth_version != eversion_t()) { - if (!object_error.has_object_info_inconsistency() && !(bl == auth_bl)) { - object_error.set_object_info_inconsistency(); - error_string += " object_info_inconsistency"; - } + if (first_bl.length() == 0) { + first_bl.append(bl); + } else if (!object_error.has_object_info_inconsistency() && !bl.contents_equal(first_bl)) { + object_error.set_object_info_inconsistency(); + error_string += " object_info_inconsistency"; } // Don't use this particular shard because it won't be able to repair data @@ -866,8 +866,6 @@ map::const_iterator auth = j; *auth_oi = oi; auth_version = oi.version; - auth_bl.clear(); - auth_bl.append(bl); } out: From 5f58301a1364e948834dabe503200dda07fc2790 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 13 Jul 2017 21:01:18 -0700 Subject: [PATCH 6/8] osd, rados: Improve size scrub error handling Fixes: http://tracker.ceph.com/issues/20243 Signed-off-by: David Zafman --- doc/rados/command/list-inconsistent-obj.json | 6 +- qa/standalone/scrub/osd-scrub-repair.sh | 164 ++++++++++--------- src/common/scrub_types.h | 3 + src/include/rados/rados_types.hpp | 8 +- src/osd/PGBackend.cc | 19 ++- src/tools/rados/rados.cc | 8 +- 6 files changed, 122 insertions(+), 86 deletions(-) diff --git a/doc/rados/command/list-inconsistent-obj.json b/doc/rados/command/list-inconsistent-obj.json index a7c17ace9be54..4e18fe525e20d 100644 --- a/doc/rados/command/list-inconsistent-obj.json +++ b/doc/rados/command/list-inconsistent-obj.json @@ -66,7 +66,8 @@ "ec_hash_error", "ec_size_error", "oi_attr_missing", - "oi_attr_corrupted" + "oi_attr_corrupted", + "obj_size_oi_mismatch" ] }, "minItems": 0, @@ -132,7 +133,8 @@ "ec_hash_error", "ec_size_error", "oi_attr_missing", - "oi_attr_corrupted" + "oi_attr_corrupted", + "obj_size_oi_mismatch" ] }, "minItems": 0, diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 38051c3938ee3..27ad2c956313a 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -634,7 +634,8 @@ function TEST_corrupt_scrub_replicated() { { "size": 9, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -642,7 +643,8 @@ function TEST_corrupt_scrub_replicated() { ], "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -717,6 +719,18 @@ function TEST_corrupt_scrub_replicated() { "shards": [ { "size": 7, + "attrs": [ + { + "Base64": false, + "value": "", + "name": "_" + }, + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "errors": [ "oi_attr_corrupted" ], @@ -725,6 +739,13 @@ function TEST_corrupt_scrub_replicated() { }, { "size": 7, + "attrs": [ + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "errors": [ "oi_attr_missing" ], @@ -785,9 +806,7 @@ function TEST_corrupt_scrub_replicated() { "union_shard_errors": [ "oi_attr_missing" ], - "errors": [ - "attr_name_mismatch" - ], + "errors": [], "object": { "version": 45, "snap": "head", @@ -901,18 +920,6 @@ function TEST_corrupt_scrub_replicated() { { "shards": [ { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", "size": 1, "errors": [], @@ -920,30 +927,21 @@ function TEST_corrupt_scrub_replicated() { "primary": false }, { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])", "size": 1, - "errors": [], + "errors": [ + "obj_size_oi_mismatch" + ], "osd": 1, "primary": true } ], "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", - "union_shard_errors": [], + "union_shard_errors": [ + "obj_size_oi_mismatch" + ], "errors": [ - "object_info_inconsistency", - "attr_value_mismatch" + "object_info_inconsistency" ], "object": { "version": 63, @@ -1020,7 +1018,8 @@ EOF "size": 9, "errors": [ "data_digest_mismatch_oi", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -1029,7 +1028,8 @@ EOF "selected_object_info": "3:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [0 0 0])", "union_shard_errors": [ "data_digest_mismatch_oi", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "data_digest_mismatch", @@ -1176,6 +1176,18 @@ EOF { "shards": [ { + "attrs": [ + { + "Base64": false, + "value": "", + "name": "_" + }, + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "data_digest": "0x2ddbf8f5", "omap_digest": "0x4f14f849", "size": 7, @@ -1186,6 +1198,13 @@ EOF "primary": false }, { + "attrs": [ + { + "Base64": true, + "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", + "name": "snapset" + } + ], "data_digest": "0x2ddbf8f5", "omap_digest": "0x4f14f849", "size": 7, @@ -1253,9 +1272,7 @@ EOF "union_shard_errors": [ "oi_attr_missing" ], - "errors": [ - "attr_name_mismatch" - ], + "errors": [], "object": { "version": 45, "snap": "head", @@ -1555,39 +1572,17 @@ EOF { "shards": [ { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539 alloc_hint [0 0 0])", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, - "errors": [], + "errors": [ + "obj_size_oi_mismatch" + ], "osd": 0, "primary": false }, { - "attrs": [ - { - "Base64": true, - "value": "", - "name": "_" - }, - { - "Base64": true, - "value": "AwIdAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAA=", - "name": "snapset" - } - ], "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", @@ -1598,10 +1593,11 @@ EOF } ], "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", - "union_shard_errors": [], + "union_shard_errors": [ + "obj_size_oi_mismatch" + ], "errors": [ - "object_info_inconsistency", - "attr_value_mismatch" + "object_info_inconsistency" ], "object": { "version": 64, @@ -1735,7 +1731,8 @@ function corrupt_scrub_erasure() { "size": 9, "shard": 0, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -1750,7 +1747,8 @@ function corrupt_scrub_erasure() { ], "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -1933,7 +1931,8 @@ function corrupt_scrub_erasure() { "size": 4096, "shard": 0, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -1948,7 +1947,8 @@ function corrupt_scrub_erasure() { ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2011,7 +2011,8 @@ EOF "shard": 0, "errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -2029,7 +2030,8 @@ EOF "selected_object_info": "3:9175b684:::EOBJ1:head(27'1 client.4155.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2225,7 +2227,8 @@ EOF "omap_digest": "0xffffffff", "size": 4096, "errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "shard": 0, "osd": 1, @@ -2243,7 +2246,8 @@ EOF ], "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4288.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2282,7 +2286,8 @@ EOF "shard": 0, "errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -2300,7 +2305,8 @@ EOF "selected_object_info": "3:9175b684:::EOBJ1:head(21'1 client.4179.0:1 dirty|data_digest|omap_digest s 7 uv 1 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "read_error", - "size_mismatch_oi" + "size_mismatch_oi", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" @@ -2539,7 +2545,8 @@ EOF "shard": 0, "errors": [ "size_mismatch_oi", - "ec_size_error" + "ec_size_error", + "obj_size_oi_mismatch" ], "osd": 1, "primary": true @@ -2557,7 +2564,8 @@ EOF "selected_object_info": "3:8549dfb5:::EOBJ5:head(65'7 client.4441.0:1 dirty|data_digest|omap_digest s 7 uv 7 dd 2ddbf8f5 od ffffffff alloc_hint [0 0 0])", "union_shard_errors": [ "size_mismatch_oi", - "ec_size_error" + "ec_size_error", + "obj_size_oi_mismatch" ], "errors": [ "size_mismatch" diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h index 21e557b98aa4c..9e30fbd9aa222 100644 --- a/src/common/scrub_types.h +++ b/src/common/scrub_types.h @@ -78,6 +78,9 @@ struct shard_info_wrapper : public librados::shard_info_t { void set_ss_attr_corrupted() { errors |= err_t::SS_ATTR_CORRUPTED; } + void set_obj_size_oi_mismatch() { + errors |= err_t::OBJ_SIZE_OI_MISMATCH; + } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bp); }; diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp index 1d5aca85beacf..7829e28702434 100644 --- a/src/include/rados/rados_types.hpp +++ b/src/include/rados/rados_types.hpp @@ -63,11 +63,12 @@ struct err_t { OI_ATTR_MISSING = 1 << 14, OI_ATTR_CORRUPTED = 1 << 15, SS_ATTR_MISSING = 1 << 16, - SS_ATTR_CORRUPTED = 1 << 17 + SS_ATTR_CORRUPTED = 1 << 17, + OBJ_SIZE_OI_MISMATCH = 1 << 18 // When adding more here add to either SHALLOW_ERRORS or DEEP_ERRORS }; uint64_t errors = 0; - static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED; + static constexpr uint64_t SHALLOW_ERRORS = SHARD_MISSING|SHARD_STAT_ERR|SIZE_MISMATCH_OI|OI_ATTR_MISSING|OI_ATTR_CORRUPTED|SS_ATTR_MISSING|SS_ATTR_CORRUPTED|OBJ_SIZE_OI_MISMATCH; static constexpr uint64_t DEEP_ERRORS = SHARD_READ_ERR|DATA_DIGEST_MISMATCH_OI|OMAP_DIGEST_MISMATCH_OI|SHARD_EC_HASH_MISMATCH|SHARD_EC_SIZE_MISMATCH; bool has_shard_missing() const { return errors & SHARD_MISSING; @@ -111,6 +112,9 @@ struct err_t { bool has_deep_errors() const { return errors & DEEP_ERRORS; } + bool has_obj_size_oi_mismatch() const { + return errors & OBJ_SIZE_OI_MISMATCH; + } }; struct shard_info_t : err_t { diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 312e9a7a44c1e..848f036a6c1ff 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -711,6 +711,9 @@ bool PGBackend::be_compare_scrub_objects( for (map::const_iterator i = auth.attrs.begin(); i != auth.attrs.end(); ++i) { + // We check system keys seperately + if (i->first == OI_ATTR || i->first == SS_ATTR) + continue; if (!candidate.attrs.count(i->first)) { if (error != CLEAN) errorstream << ", "; @@ -728,6 +731,9 @@ bool PGBackend::be_compare_scrub_objects( for (map::const_iterator i = candidate.attrs.begin(); i != candidate.attrs.end(); ++i) { + // We check system keys seperately + if (i->first == OI_ATTR || i->first == SS_ATTR) + continue; if (!auth.attrs.count(i->first)) { if (error != CLEAN) errorstream << ", "; @@ -839,10 +845,12 @@ map::const_iterator error_string += " object_info_inconsistency"; } - // Don't use this particular shard because it won't be able to repair data - // XXX: For now we can't pick one shard for repair and another's object info - if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch) + if (i->second.size != be_get_ondisk_size(oi.size)) { + dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl; + shard_info.set_obj_size_oi_mismatch(); + error_string += " obj_size_oi_mismatch"; goto out; + } // We don't set errors here for snapset, but we won't pick an auth copy if the // snapset is missing or won't decode. @@ -861,6 +869,11 @@ map::const_iterator } } + // Don't use this particular shard because it won't be able to repair data + // XXX: For now we can't pick one shard for repair and another's object info + if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch) + goto out; + if (auth_version == eversion_t() || oi.version > auth_version || (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) { auth = j; diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 5e8684deccd11..682b53059bf3d 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -1338,6 +1338,8 @@ static void dump_errors(const err_t &err, Formatter &f, const char *name) f.dump_string("error", "oi_attr_missing"); if (err.has_oi_attr_corrupted()) f.dump_string("error", "oi_attr_corrupted"); + if (err.has_obj_size_oi_mismatch()) + f.dump_string("error", "obj_size_oi_mismatch"); f.close_section(); } @@ -1369,7 +1371,11 @@ static void dump_shard(const shard_info_t& shard, ::decode(oi, bliter); // Can't be corrupted f.dump_stream("object_info") << oi; } - if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch()) { + if (inc.has_attr_name_mismatch() || inc.has_attr_value_mismatch() + || inc.union_shards.has_oi_attr_missing() + || inc.union_shards.has_oi_attr_corrupted() + || inc.union_shards.has_ss_attr_missing() + || inc.union_shards.has_ss_attr_corrupted()) { f.open_array_section("attrs"); for (auto kv : shard.attrs) { f.open_object_section("attr"); From 4c949b6258109884ce1683d4474c740d5e61aee6 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 18 Jul 2017 18:45:57 -0700 Subject: [PATCH 7/8] osd, rados: Adding ss_attr_missing and ss_attr_corrupt errors to list-inconsistent-obj Signed-off-by: David Zafman --- doc/rados/command/list-inconsistent-obj.json | 8 +- qa/standalone/scrub/osd-scrub-repair.sh | 134 +++++++++++++++++-- src/osd/PGBackend.cc | 7 +- src/tools/rados/rados.cc | 4 + 4 files changed, 138 insertions(+), 15 deletions(-) diff --git a/doc/rados/command/list-inconsistent-obj.json b/doc/rados/command/list-inconsistent-obj.json index 4e18fe525e20d..76ca43e321de9 100644 --- a/doc/rados/command/list-inconsistent-obj.json +++ b/doc/rados/command/list-inconsistent-obj.json @@ -67,7 +67,9 @@ "ec_size_error", "oi_attr_missing", "oi_attr_corrupted", - "obj_size_oi_mismatch" + "obj_size_oi_mismatch", + "ss_attr_missing", + "ss_attr_corrupted" ] }, "minItems": 0, @@ -134,7 +136,9 @@ "ec_size_error", "oi_attr_missing", "oi_attr_corrupted", - "obj_size_oi_mismatch" + "obj_size_oi_mismatch", + "ss_attr_missing", + "ss_attr_corrupted" ] }, "minItems": 0, diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 27ad2c956313a..8d58c584595f4 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -481,7 +481,7 @@ function TEST_list_missing_erasure_coded_overwrites() { function TEST_corrupt_scrub_replicated() { local dir=$1 local poolname=csr_pool - local total_objs=15 + local total_objs=16 setup $dir || return 1 run_mon $dir a --osd_pool_default_size=2 || return 1 @@ -597,6 +597,12 @@ function TEST_corrupt_scrub_replicated() { 15) objectstore_tool $dir $osd $objname rm-attr _ || return 1 + ;; + + 16) + objectstore_tool $dir 0 $objname rm-attr snapset || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1 esac done @@ -815,6 +821,57 @@ function TEST_corrupt_scrub_replicated() { "name": "ROBJ15" } }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + } + ], + "errors": [ + "ss_attr_missing" + ], + "osd": 0, + "primary": false, + "size": 7 + }, + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + }, + { + "Base64": false, + "name": "snapset", + "value": "bad-val" + } + ], + "errors": [ + "ss_attr_corrupted" + ], + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "ss_attr_missing", + "ss_attr_corrupted" + ] + }, { "shards": [ { @@ -903,14 +960,14 @@ function TEST_corrupt_scrub_replicated() { "primary": true } ], - "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", + "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 62, + "version": 66, "snap": "head", "locator": "", "nspace": "", @@ -920,7 +977,7 @@ function TEST_corrupt_scrub_replicated() { { "shards": [ { - "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", + "object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", "size": 1, "errors": [], "osd": 0, @@ -936,7 +993,7 @@ function TEST_corrupt_scrub_replicated() { "primary": true } ], - "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", + "selected_object_info": "3:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 67 dd 2b63260d od 2eecc539 alloc_hint [0 0 0])", "union_shard_errors": [ "obj_size_oi_mismatch" ], @@ -944,7 +1001,7 @@ function TEST_corrupt_scrub_replicated() { "object_info_inconsistency" ], "object": { - "version": 63, + "version": 67, "snap": "head", "locator": "", "nspace": "", @@ -1281,6 +1338,61 @@ EOF "name": "ROBJ15" } }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + } + ], + "data_digest": "0x2ddbf8f5", + "errors": [ + "ss_attr_missing" + ], + "omap_digest": "0x8b699207", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "attrs": [ + { + "Base64": true, + "name": "_", + "value": "" + }, + { + "Base64": false, + "name": "snapset", + "value": "bad-val" + } + ], + "data_digest": "0x2ddbf8f5", + "errors": [ + "ss_attr_corrupted" + ], + "omap_digest": "0x8b699207", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "ss_attr_missing", + "ss_attr_corrupted" + ] + }, { "shards": [ { @@ -1555,14 +1667,14 @@ EOF "primary": true } ], - "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", + "selected_object_info": "3:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 66 dd 2ddbf8f5 od d6be81dc alloc_hint [0 0 0])", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 62, + "version": 66, "snap": "head", "locator": "", "nspace": "", @@ -1583,7 +1695,7 @@ EOF "primary": false }, { - "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", + "object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, @@ -1592,7 +1704,7 @@ EOF "primary": true } ], - "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", + "selected_object_info": "3:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 68 dd 1f26fb26 od 2eecc539 alloc_hint [0 0 0])", "union_shard_errors": [ "obj_size_oi_mismatch" ], @@ -1600,7 +1712,7 @@ EOF "object_info_inconsistency" ], "object": { - "version": 64, + "version": 68, "snap": "head", "locator": "", "nspace": "", diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 848f036a6c1ff..312bda520fa26 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -852,11 +852,12 @@ map::const_iterator goto out; } - // We don't set errors here for snapset, but we won't pick an auth copy if the - // snapset is missing or won't decode. + // We won't pick an auth copy if the snapset is missing or won't decode. if (obj.is_head() || obj.is_snapdir()) { k = i->second.attrs.find(SS_ATTR); if (k == i->second.attrs.end()) { + shard_info.set_ss_attr_missing(); + error_string += " ss_attr_missing"; goto out; } ss_bl.push_back(k->second); @@ -865,6 +866,8 @@ map::const_iterator ::decode(ss, bliter); } catch (...) { // invalid snapset, probably corrupt + shard_info.set_ss_attr_corrupted(); + error_string += " ss_attr_corrupted"; goto out; } } diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 682b53059bf3d..156647c307717 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -1340,6 +1340,10 @@ static void dump_errors(const err_t &err, Formatter &f, const char *name) f.dump_string("error", "oi_attr_corrupted"); if (err.has_obj_size_oi_mismatch()) f.dump_string("error", "obj_size_oi_mismatch"); + if (err.has_ss_attr_missing()) + f.dump_string("error", "ss_attr_missing"); + if (err.has_ss_attr_corrupted()) + f.dump_string("error", "ss_attr_corrupted"); f.close_section(); } From 75b425671a75c80ed95a52820cabf25d3fafcfff Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 26 Jul 2017 16:22:26 -0700 Subject: [PATCH 8/8] osd: In scrub's be_select_auth_object() detect multiple errors better Signed-off-by: David Zafman --- src/osd/PGBackend.cc | 46 +++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/osd/PGBackend.cc b/src/osd/PGBackend.cc index 312bda520fa26..a1adce752be93 100644 --- a/src/osd/PGBackend.cc +++ b/src/osd/PGBackend.cc @@ -817,6 +817,25 @@ map::const_iterator goto out; } + // We won't pick an auth copy if the snapset is missing or won't decode. + if (obj.is_head() || obj.is_snapdir()) { + k = i->second.attrs.find(SS_ATTR); + if (k == i->second.attrs.end()) { + shard_info.set_ss_attr_missing(); + error_string += " ss_attr_missing"; + } else { + ss_bl.push_back(k->second); + try { + bufferlist::iterator bliter = ss_bl.begin(); + ::decode(ss, bliter); + } catch (...) { + // invalid snapset, probably corrupt + shard_info.set_ss_attr_corrupted(); + error_string += " ss_attr_corrupted"; + } + } + } + k = i->second.attrs.find(OI_ATTR); if (k == i->second.attrs.end()) { // no object info on object, probably corrupt @@ -849,32 +868,11 @@ map::const_iterator dout(5) << __func__ << " size " << i->second.size << " oi size " << oi.size << dendl; shard_info.set_obj_size_oi_mismatch(); error_string += " obj_size_oi_mismatch"; - goto out; - } - - // We won't pick an auth copy if the snapset is missing or won't decode. - if (obj.is_head() || obj.is_snapdir()) { - k = i->second.attrs.find(SS_ATTR); - if (k == i->second.attrs.end()) { - shard_info.set_ss_attr_missing(); - error_string += " ss_attr_missing"; - goto out; - } - ss_bl.push_back(k->second); - try { - bufferlist::iterator bliter = ss_bl.begin(); - ::decode(ss, bliter); - } catch (...) { - // invalid snapset, probably corrupt - shard_info.set_ss_attr_corrupted(); - error_string += " ss_attr_corrupted"; - goto out; - } } - // Don't use this particular shard because it won't be able to repair data - // XXX: For now we can't pick one shard for repair and another's object info - if (i->second.read_error || i->second.ec_hash_mismatch || i->second.ec_size_mismatch) + // Don't use this particular shard due to previous errors + // XXX: For now we can't pick one shard for repair and another's object info or snapset + if (shard_info.errors) goto out; if (auth_version == eversion_t() || oi.version > auth_version ||