From 780c29903249900bd3b5b965732ee164a3e2565d Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 3 Jan 2017 10:50:22 -0800 Subject: [PATCH] PrimaryLogPG: don't update digests for objects with mismatched names I've only seen this on one cluster, but let's not issue repops during scrub on objects where the object_info_t::soid value is not correct. The cluster in question has been through many different non-release kernels and osd versions, so the objects presumably came about due to an old xfs or filestore bug. They recently became fatal since we made filestore crash on ENOENT for setattrs. In the past, the cluster just silently tolerated them. http://tracker.ceph.com/issues/18409 is a larger feature to detect these better and repair them automatically. Related: http://tracker.ceph.com/issues/18409 Signed-off-by: Samuel Just --- src/osd/PrimaryLogPG.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 9e72675f9a940..fbd5fa4f00b71 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -13120,7 +13120,18 @@ void PrimaryLogPG::scrub_snapshot_metadata( continue; dout(10) << __func__ << " recording digests for " << p->first << dendl; ObjectContextRef obc = get_object_context(p->first, false); - assert(obc); + if (!obc) { + osd->clog->error() << info.pgid << " " << mode + << " cannot get object context for " + << p->first; + continue; + } else if (obc->obs.oi.soid != p->first) { + osd->clog->error() << info.pgid << " " << mode + << " object " << p->first + << " has a valid oi attr with a mismatched name, " + << " obc->obs.oi.soid: " << obc->obs.oi.soid; + continue; + } OpContextUPtr ctx = simple_opc_create(obc); ctx->at_version = get_next_version(); ctx->mtime = utime_t(); // do not update mtime