Skip to content

Commit

Permalink
pybind/mgr/progress: enforced try and except on accessing event dicti…
Browse files Browse the repository at this point in the history
…onary

There is a certain race condition scenario where
an event gets deleted while the progress module
iterates through the ``events`` dictionary,
without a ``try and except``, this will cause
an unhandled exception error and will crash
the module.

This commit will enforce ``try and except``
on every part of the code where we are accessing
the ``events`` dictionary.

Fixes: https://tracker.ceph.com/issues/53803

Signed-off-by: Kamoltat <ksirivad@redhat.com>
  • Loading branch information
kamoltat committed Jan 12, 2022
1 parent 69ac8a4 commit b70d4a9
Showing 1 changed file with 24 additions and 16 deletions.
40 changes: 24 additions & 16 deletions src/pybind/mgr/progress/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,12 +539,15 @@ def _osd_in_out(self, old_map, old_dump, new_map, osd_id, marked):
# previous recovery event for that osd
if marked == "in":
for ev_id in list(self._events):
ev = self._events[ev_id]
if isinstance(ev, PgRecoveryEvent) and osd_id in ev.which_osds:
self.log.info("osd.{0} came back in, cancelling event".format(
osd_id
))
self._complete(ev)
try:
ev = self._events[ev_id]
if isinstance(ev, PgRecoveryEvent) and osd_id in ev.which_osds:
self.log.info("osd.{0} came back in, cancelling event".format(
osd_id
))
self._complete(ev)
except KeyError:
self.log.warning("_osd_in_out: ev {0} does not exist".format(ev_id))

if len(affected_pgs) > 0:
r_ev = PgRecoveryEvent(
Expand Down Expand Up @@ -625,16 +628,20 @@ def _process_pg_summary(self):
global_event = False
data = self.get("pg_progress")
for ev_id in list(self._events):
ev = self._events[ev_id]
# Check for types of events
# we have to update
if isinstance(ev, PgRecoveryEvent):
ev.pg_update(data, self.log)
self.maybe_complete(ev)
elif isinstance(ev, GlobalRecoveryEvent):
global_event = True
ev.global_event_update_progress(self.log)
self.maybe_complete(ev)
try:
ev = self._events[ev_id]
# Check for types of events
# we have to update
if isinstance(ev, PgRecoveryEvent):
ev.pg_update(data, self.log)
self.maybe_complete(ev)
elif isinstance(ev, GlobalRecoveryEvent):
global_event = True
ev.global_event_update_progress(self.log)
self.maybe_complete(ev)
except KeyError:
self.log.warning("_process_pg_summary: ev {0} does not exist".format(ev_id))
continue

if not global_event:
# If there is no global event
Expand Down Expand Up @@ -736,6 +743,7 @@ def update(self, ev_id, ev_msg, ev_progress, refs=None, add_to_ceph_s=False):
ev = self._events[ev_id]
assert isinstance(ev, RemoteEvent)
except KeyError:
# if key doesn't exist we create an event
ev = RemoteEvent(ev_id, ev_msg, refs, add_to_ceph_s)
self._events[ev_id] = ev
self.log.info("update: starting ev {0} ({1})".format(
Expand Down

0 comments on commit b70d4a9

Please sign in to comment.