From 5f4e62f382767ee69e5b0c701b1a01d9e4132237 Mon Sep 17 00:00:00 2001
From: Joao Eduardo Luis <joao@redhat.com>
Date: Fri, 17 Oct 2014 19:08:20 +0100
Subject: [PATCH] mon: MDSMonitor: wait for osdmon to be writable when
 requesting proposal

Otherwise we may end up requesting the osdmon to propose while it is
mid-proposal.  We can't simply return EAGAIN to the user either because
then we would have to expect the user to be able to successfully race
with the whole cluster in finding a window in which 'mds fs new' command
would succeed -- which is not a realistic expectation.  Having the
command to osdmon()->wait_for_writable() guarantees that the command
will be added to a queue and that we will, eventually, tend to it.

Fixes: #9794

Signed-off-by: Joao Eduardo Luis <joao@redhat.com>
(cherry picked from commit 2ae1cba595d9c56a0a4c534b34fe25250e7eb2d5)
---
 src/mon/MDSMonitor.cc | 19 +++++++++++++++++--
 src/mon/MDSMonitor.h  |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 212b0e99ba38e..21fdb24c8d512 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -928,8 +928,14 @@ bool MDSMonitor::prepare_command(MMonCommand *m)
   }
 
   /* Execute filesystem add/remove, or pass through to filesystem_command */
-  r = management_command(prefix, cmdmap, ss);
-  if (r >= 0 || r != -ENOSYS)
+  r = management_command(m, prefix, cmdmap, ss);
+  if (r >= 0)
+    goto out;
+  
+  if (r == -EAGAIN) {
+    // message has been enqueued for retry; return.
+    return false;
+  } else if (r != -ENOSYS) {
     // MDSMonitor::management_command() returns -ENOSYS if it knows nothing
     // about the command passed to it, in which case we will check whether
     // MDSMonitor::filesystem_command() knows about it.  If on the other hand
@@ -1015,6 +1021,7 @@ int MDSMonitor::_check_pool(
  *         fall through and look for other types of command.
  */
 int MDSMonitor::management_command(
+    MMonCommand *m,
     std::string const &prefix,
     map<string, cmd_vartype> &cmdmap,
     std::stringstream &ss)
@@ -1133,6 +1140,14 @@ int MDSMonitor::management_command(
     // Automatically set crash_replay_interval on data pool if it
     // isn't already set.
     if (data_pool->get_crash_replay_interval() == 0) {
+      // We will be changing osdmon's state and requesting the osdmon to
+      // propose.  We thus need to make sure the osdmon is writeable before
+      // we do this, waiting if it's not.
+      if (!mon->osdmon()->is_writeable()) {
+        mon->osdmon()->wait_for_writeable(new C_RetryMessage(this, m));
+        return -EAGAIN;
+      }
+
       r = mon->osdmon()->set_crash_replay_interval(data, g_conf->osd_default_data_pool_replay_window);
       assert(r == 0);  // We just did get_pg_pool so it must exist and be settable
       request_proposal(mon->osdmon());
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index 23f4f21c25449..40eaf64d393d6 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -102,6 +102,7 @@ class MDSMonitor : public PaxosService {
   bool preprocess_command(MMonCommand *m);
   bool prepare_command(MMonCommand *m);
   int management_command(
+      MMonCommand *m,
       std::string const &prefix,
       map<string, cmd_vartype> &cmdmap,
       std::stringstream &ss);