tools/glusterfind: add --full option to query command

The --full option will use brickfind.py to list all files in the volume. The output file will contain url-encoded file names prefixed with the tag string to indicate that all files should be considered as new. The default tag string for the --full option is "NEW". This can be changed with the --tag-for-full-find command-line option. Change-Id: Ic85ba5db062e19df13ae9dc2de8a08eacb5c9792 BUG: 1286279 Signed-off-by: Milind Changire <mchangir@redhat.com> Reviewed-on: http://review.gluster.org/12779 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Aravinda VK <avishwan@redhat.com>
gluster · Mar 31, 2016 · 16a3f0d · 16a3f0d
1 parent 9746ee7
commit 16a3f0d
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 35 deletions.
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
@@ -42,7 +42,7 @@ def output_callback(path, filter_result):
             path = path.strip()
             path = path[brick_path_len+1:]
             output_write(fout, path, args.output_prefix,
-                         encode=(not args.no_encode))
+                         encode=(not args.no_encode), tag=args.tag)
 
         ignore_dirs = [os.path.join(brick, dirname)
                        for dirname in
@@ -64,6 +64,9 @@ def _get_args():
     parser.add_argument("brick", help="Brick Name")
     parser.add_argument("outfile", help="Output File")
     parser.add_argument("start", help="Start Time", type=float)
+    parser.add_argument("tag", help="Tag to prefix file name with")
+    parser.add_argument("--only-query", help="Only query, No session update",
+                        action="store_true")
     parser.add_argument("--debug", help="Debug", action="store_true")
     parser.add_argument("--no-encode",
                         help="Do not encode path in outfile",
@@ -92,6 +95,7 @@ def _get_args():
 
     time_to_update = int(time.time())
     brickfind_crawl(args.brick, args)
-    with open(status_file_pre, "w", buffering=0) as f:
-        f.write(str(time_to_update))
+    if not args.only_query:
+        with open(status_file_pre, "w", buffering=0) as f:
+            f.write(str(time_to_update))
     sys.exit(0)
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
@@ -109,8 +109,12 @@ def run_cmd_nodes(task, args, **kwargs):
 
             # If Full backup is requested or start time is zero, use brickfind
             change_detector = conf.get_change_detector("changelog")
+            tag = None
             if args.full:
                 change_detector = conf.get_change_detector("brickfind")
+                tag = args.tag_for_full_find.strip()
+                if tag == "":
+                    tag = '""' if not is_host_local(host_uuid) else ""
 
             node_outfiles.append(node_outfile)
 
@@ -119,9 +123,9 @@ def run_cmd_nodes(task, args, **kwargs):
                    args.volume,
                    brick,
                    node_outfile,
-                   str(kwargs.get("start")),
-                   "--output-prefix",
-                   args.output_prefix] + \
+                   str(kwargs.get("start"))] + \
+                ([tag] if tag is not None else []) + \
+                ["--output-prefix", args.output_prefix] + \
                 (["--debug"] if args.debug else []) + \
                 (["--no-encode"] if args.no_encode else []) + \
                 (["--only-namespace-changes"] if args.only_namespace_changes
@@ -131,7 +135,14 @@ def run_cmd_nodes(task, args, **kwargs):
             opts["copy_outfile"] = True
         elif task == "query":
             # If Full backup is requested or start time is zero, use brickfind
+            tag = None
             change_detector = conf.get_change_detector("changelog")
+            if args.full:
+                change_detector = conf.get_change_detector("brickfind")
+                tag = args.tag_for_full_find.strip()
+                if tag == "":
+                    tag = '""' if not is_host_local(host_uuid) else ""
+
             node_outfiles.append(node_outfile)
 
             cmd = [change_detector,
@@ -140,6 +151,7 @@ def run_cmd_nodes(task, args, **kwargs):
                    brick,
                    node_outfile,
                    str(kwargs.get("start"))] + \
+                ([tag] if tag is not None else []) + \
                 ["--only-query"] + \
                 ["--output-prefix", args.output_prefix] + \
                 (["--debug"] if args.debug else []) + \
@@ -296,23 +308,35 @@ def _get_args():
     parser_pre.add_argument("-N", "--only-namespace-changes",
                             help="List only namespace changes",
                             action="store_true")
+    parser_pre.add_argument("--tag-for-full-find",
+                            help="Tag prefix for file names emitted during"
+                            " a full find operation; default: \"NEW\"",
+                            default="NEW")
 
     # query <VOLUME> <OUTFILE> --since-time <SINCE_TIME>
     #       [--output-prefix <OUTPUT_PREFIX>] [--full]
-    parser_pre = subparsers.add_parser('query')
-    parser_pre.add_argument("volume", help="Volume Name")
-    parser_pre.add_argument("outfile", help="Output File",
-                            action=StoreAbsPath)
-    parser_pre.add_argument("--since-time", help="UNIX epoch time since which "
-                            "listing is required", type=int)
-    parser_pre.add_argument("--debug", help="Debug", action="store_true")
-    parser_pre.add_argument("--disable-partial", help="Disable Partial find, "
-                            "Fail when one node fails", action="store_true")
-    parser_pre.add_argument("--output-prefix", help="File prefix in output",
-                            default=".")
-    parser_pre.add_argument("-N", "--only-namespace-changes",
-                            help="List only namespace changes",
-                            action="store_true")
+    parser_query = subparsers.add_parser('query')
+    parser_query.add_argument("volume", help="Volume Name")
+    parser_query.add_argument("outfile", help="Output File",
+                              action=StoreAbsPath)
+    parser_query.add_argument("--since-time", help="UNIX epoch time since "
+                              "which listing is required", type=int)
+    parser_query.add_argument("--no-encode",
+                              help="Do not encode path in output file",
+                              action="store_true")
+    parser_query.add_argument("--full", help="Full find", action="store_true")
+    parser_query.add_argument("--debug", help="Debug", action="store_true")
+    parser_query.add_argument("--disable-partial", help="Disable Partial find,"
+                              " Fail when one node fails", action="store_true")
+    parser_query.add_argument("--output-prefix", help="File prefix in output",
+                              default=".")
+    parser_query.add_argument("-N", "--only-namespace-changes",
+                              help="List only namespace changes",
+                              action="store_true")
+    parser_query.add_argument("--tag-for-full-find",
+                              help="Tag prefix for file names emitted during"
+                              " a full find operation; default: \"NEW\"",
+                              default="NEW")
 
     # post <SESSION> <VOLUME>
     parser_post = subparsers.add_parser('post')
@@ -491,31 +515,46 @@ def mode_query(session_dir, args):
     # Enable volume options for changelog capture
     enable_volume_options(args)
 
+    # Test options
+    if not args.since_time and not args.full:
+        fail("Please specify either --since-time or --full", logger=logger)
+
+    if args.since_time and args.full:
+        fail("Please specify either --since-time or --full, but not both",
+             logger=logger)
+
     # Start query command processing
     if args.since_time:
         start = args.since_time
-        logger.debug("Query is called - Session: %s, Volume: %s, "
-                     "Start time: %s"
-                     % ("default", args.volume, start))
+    else:
+        start = 0  # --full option is handled separately
+
+    logger.debug("Query is called - Session: %s, Volume: %s, "
+                 "Start time: %s"
+                 % ("default", args.volume, start))
 
-        run_cmd_nodes("query", args, start=start)
+    run_cmd_nodes("query", args, start=start)
 
-        # Merger
+    # Merger
+    if args.full:
+        cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
+        execute(cmd,
+                exit_msg="Failed to merge output files "
+                "collected from nodes", logger=logger)
+    else:
         # Read each Changelogs db and generate finaldb
         create_file(args.outfile, exit_on_err=True, logger=logger)
         outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
         write_output(args, outfilemerger)
 
-        try:
-            os.remove(args.outfile + ".db")
-        except (IOError, OSError):
-            pass
+    try:
+        os.remove(args.outfile + ".db")
+    except (IOError, OSError):
+        pass
 
-        run_cmd_nodes("cleanup", args)
+    run_cmd_nodes("cleanup", args)
 
-        sys.stdout.write("Generated output file %s\n" % args.outfile)
-    else:
-        fail("Please specify --since-time option")
+    sys.stdout.write("Generated output file %s\n" % args.outfile)
 
 
 def mode_pre(session_dir, args):

diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
@@ -75,7 +75,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
                 callback_func(full_path, filter_result)
 
 
-def output_write(f, path, prefix=".", encode=False):
+def output_write(f, path, prefix=".", encode=False, tag=""):
     if path == "":
         return
 
@@ -85,7 +85,10 @@ def output_write(f, path, prefix=".", encode=False):
     if encode:
         path = urllib.quote_plus(path)
 
-    f.write("%s\n" % path)
+    # set the field separator
+    FS = "" if tag == "" else " "
+
+    f.write("%s%s%s\n" % (tag.strip(), FS, path))
 
 
 def human_time(ts):