ceph · liewegas · Jun 16, 2017 · May 26, 2017 · May 26, 2017 · May 26, 2017
diff --git a/qa/suites/rados/basic-luminous/scrub_test.yaml b/qa/suites/rados/basic-luminous/scrub_test.yaml
@@ -1,5 +1,6 @@
 overrides:
   ceph:
+    wait-for-scrub: false
     log-whitelist:
     - '!= data_digest'
     - '!= omap_digest'

diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml
@@ -1,5 +1,6 @@
 overrides:
   ceph:
+    wait-for-scrub: false
     log-whitelist:
       - candidate had a stat error
       - candidate had a read error

diff --git a/qa/suites/rados/thrash/workloads/radosbench.yaml b/qa/suites/rados/thrash/workloads/radosbench.yaml
@@ -9,19 +9,25 @@ tasks:
 - full_sequential:
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
   - radosbench:
       clients: [client.0]
-      time: 120
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py
@@ -1030,7 +1030,7 @@ def osd_scrub_pgs(ctx, config):
     indicate the last scrub completed.  Time out if no progess is made
     here after two minutes.
     """
-    retries = 12
+    retries = 20
     delays = 10
     cluster_name = config['cluster']
     manager = ctx.managers[cluster_name]
@@ -1044,8 +1044,7 @@ def osd_scrub_pgs(ctx, config):
         log.info("Waiting for all osds to be active and clean.")
         time.sleep(delays)
     if not all_clean:
-        log.info("Scrubbing terminated -- not all pgs were active and clean.")
-        return
+        raise RuntimeError("Scrubbing terminated -- not all pgs were active and clean.")
     check_time_now = time.localtime()
     time.sleep(1)
     all_roles = teuthology.all_roles(ctx.cluster)
@@ -1074,8 +1073,7 @@ def osd_scrub_pgs(ctx, config):
         else:
             gap_cnt += 1
             if gap_cnt > retries:
-                log.info('Exiting scrub checking -- not all pgs scrubbed.')
-                return
+                raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.')
         if loop:
             log.info('Still waiting for all pgs to be scrubbed.')
             time.sleep(delays)

diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
@@ -1186,6 +1186,9 @@ def flush_pg_stats(self, osds, no_wait=None, wait_for_mon=3*5):
                                 'osd.{osd}: {got} < {need}'.
                                 format(osd=osd, got=got, need=need))
 
+    def flush_all_pg_stats(self):
+        self.flush_pg_stats(range(len(self.get_osd_dump())))
+
     def do_rados(self, remote, cmd, check_status=True):
         """
         Execute a remote rados command.

diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py
@@ -197,4 +197,6 @@ def task(ctx, config):
     finally:
         log.info('joining thrashosds')
         thrash_proc.do_join()
+        cluster_manager.wait_for_all_up()
+        cluster_manager.flush_all_pg_stats()
         cluster_manager.wait_for_recovery(config.get('timeout', 360))