From aaa88b21fef184ce5fa7dc48f98843973fe10c60 Mon Sep 17 00:00:00 2001 From: Todd Sedano Date: Fri, 21 Apr 2017 18:07:11 -0700 Subject: [PATCH] Split _search_for_latest and add unit tests - This also fixes a bug where we looked for the report file in the wrong directory Signed-off-by: Karen Huddleston --- gpMgmt/bin/gpdbrestore | 45 ++++++++++--------- gpMgmt/bin/gppylib/operations/backup_utils.py | 23 +++++++--- gpMgmt/bin/gppylib/operations/restore.py | 2 +- .../test/unit/test_unit_backup_utils.py | 42 ++++++++++++----- .../test/unit/test_unit_gpdbrestore.py | 44 ++++++++++++------ 5 files changed, 103 insertions(+), 53 deletions(-) diff --git a/gpMgmt/bin/gpdbrestore b/gpMgmt/bin/gpdbrestore index c084c43f6a7c..ce7f11a48e2a 100755 --- a/gpMgmt/bin/gpdbrestore +++ b/gpMgmt/bin/gpdbrestore @@ -407,7 +407,8 @@ class GpdbRestore(Operation): self._get_timestamp_for_validation(root) - self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report")) + report_filename = self.generate_filename("report") + self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename) def _validate_db_host_path(self): # The format of the -R option should be 'hostname:path_to_dumpset', hence the length should be 2 (hostname, path_to_dumpset) @@ -439,7 +440,8 @@ class GpdbRestore(Operation): Scp('Copying report file', srcHost=host, srcFile="%s/%s" % (path, file), dstFile="/tmp").run(validateAfter=True) self._get_timestamp_for_validation("/tmp") - self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report", directory="/tmp")) + report_filename = self.context.generate_filename("report", directory="/tmp") + self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename) local_files = ListFilesByPattern("/tmp", report_pattern).run() for file in local_files: @@ -463,31 +465,32 @@ class GpdbRestore(Operation): self.context.timestamp = match[-18:-4] self.context.use_old_filename_format = self.context.is_timestamp_in_old_format() - def _search_for_latest(self): - logger.info("Scanning Master host for latest dump file set for database %s" % self.search_for_dbname) - root = os.path.join(self.context.get_backup_root(), self.context.dump_dir) - candidates, timestamps = [], [] - prefix = "%s*.rpt" % self.context.generate_prefix("report") - for path, dirs, files in os.walk(root): - matching = fnmatch.filter(files, "%s*" % prefix) - candidates.extend([(path, report_file) for report_file in matching]) - if len(candidates) == 0: - raise ExceptionNoStackTraceNeeded("No report files located") - for path, report_file in candidates: - self.context.get_compress_and_dbname_from_report_file(os.path.join(path, report_file)) - db_name = self.context.target_db - logger.debug("Looking for %s, found %s" % (self.search_for_dbname, db_name)) - if self.search_for_dbname == db_name: - logger.info('Located dump file %s for database %s, adding to list' % (report_file, self.search_for_dbname)) + def _get_dump_timestamps_for_database(self, search_for_dbname, backup_root): + timestamps = [] + report_files = self.context.get_report_files_and_paths(backup_root) + for path, report_file in report_files: + _, report_file_dbname = self.context.get_compress_and_dbname_from_report_file(os.path.join(path, report_file)) + logger.debug("Looking for %s, found %s" % (search_for_dbname, report_file_dbname)) + if search_for_dbname == report_file_dbname: + logger.info('Located report file %s for database %s, adding to list' % (report_file, search_for_dbname)) timestamps.append(int(report_file[-18:-4])) else: - logger.debug("Dump file has incorrect database name of %s, skipping..." % db_name) + logger.debug("Report file has incorrect database name of %s, skipping..." % report_file_dbname) if len(timestamps) == 0: - raise ExceptionNoStackTraceNeeded("No report files located with database %s" % self.search_for_dbname) + raise ExceptionNoStackTraceNeeded("No report files located with database %s" % search_for_dbname) + return timestamps + + def _search_for_latest(self): + logger.info("Scanning Master host for latest dump file set for database %s" % self.search_for_dbname) + backup_root = os.path.join(self.context.get_backup_root(), self.context.dump_dir) + timestamps = self._get_dump_timestamps_for_database(self.search_for_dbname, backup_root) self.context.timestamp = str(max(timestamps)) logger.info("Identified latest dump timestamp for %s as %s" % (self.search_for_dbname, self.context.timestamp)) + + #Set context variables that we will need for the rest of the restore self.context.use_old_filename_format = self.context.is_timestamp_in_old_format() - self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report", directory=path)) + report_filename = self.context.generate_filename("report") + self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename) def _select_multi_file(self, dates_and_times): spc = " " diff --git a/gpMgmt/bin/gppylib/operations/backup_utils.py b/gpMgmt/bin/gppylib/operations/backup_utils.py index 9f422b5a9952..bd368c699fc4 100755 --- a/gpMgmt/bin/gppylib/operations/backup_utils.py +++ b/gpMgmt/bin/gppylib/operations/backup_utils.py @@ -257,21 +257,32 @@ def get_dump_dirs(self): dirnames = sorted(dirnames, key=lambda x: int(os.path.basename(x)), reverse=True) return dirnames + def get_report_files_and_paths(self, backup_root): + reports = [] + prefix = "%s*.rpt" % self.generate_prefix("report") + for path, dirs, files in os.walk(backup_root): + matching = fnmatch.filter(files, "%s*" % prefix) + reports.extend([(path, report_file) for report_file in matching]) + if len(reports) == 0: + raise Exception("No report files located") + return reports + def get_compress_and_dbname_from_report_file(self, report_file): contents = get_lines_from_file(report_file) - self.compress = None - self.target_db = "" + compress = None + target_db = "" name_pattern = re.compile(r'Port [0-9]+ Database (.*) BackupFile') for line in contents: if "Compression Program: gzip" in line: - self.compress = True + compress = True elif "Compression Program: None" in line: - self.compress = False + compress = False matching = name_pattern.search(line) if matching and matching.group(1): - self.target_db = matching.group(1) - if self.compress is None or not self.target_db: + target_db = matching.group(1) + if compress is None or not target_db: raise Exception("Could not determine database name and compression type from report file %s" % report_file) + return compress, target_db def get_filename_for_content(context, filetype, content, remote_directory=None, host=None): filetype_glob = context.generate_filename(filetype, content=content, directory=remote_directory) diff --git a/gpMgmt/bin/gppylib/operations/restore.py b/gpMgmt/bin/gppylib/operations/restore.py index 7ddeeacd3f07..6f3cfd7d02d4 100755 --- a/gpMgmt/bin/gppylib/operations/restore.py +++ b/gpMgmt/bin/gppylib/operations/restore.py @@ -832,7 +832,7 @@ def __init__(self, context): def validate_metadata_file(self): report_file = self.context.generate_filename("report") self.context.use_old_filename_format = self.context.is_timestamp_in_old_format() - self.context.get_compress_and_dbname_from_report_file(report_file) + self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_file) if self.context.ddboost: # We pass in segment_dir='' because we don't want it included in our path for ddboost diff --git a/gpMgmt/bin/gppylib/operations/test/unit/test_unit_backup_utils.py b/gpMgmt/bin/gppylib/operations/test/unit/test_unit_backup_utils.py index 005134b9d8ab..2433ce4c3e1c 100644 --- a/gpMgmt/bin/gppylib/operations/test/unit/test_unit_backup_utils.py +++ b/gpMgmt/bin/gppylib/operations/test/unit/test_unit_backup_utils.py @@ -1246,33 +1246,51 @@ def test__escapeDoubleQuoteInSQLString(self): self.assertEqual('"MY\'DATE"', escapeDoubleQuoteInSQLString('''MY'DATE''')) self.assertEqual('"MY""""DATE"', escapeDoubleQuoteInSQLString('MY""DATE')) + + @patch('os.walk', return_value=[('path', ['dir1', 'dir2'], ['gp_dump_20160101010101.rpt', 'file2', 'gp_dump_20160101010102.rpt']), + ('path2', ['dir3'], ['gp_dump_20160101010103.rpt']), + ('path3', ['dir4', 'dir5'], ['file5', 'gp_dump_20160101010104.rpt'])]) + def test_get_report_files_and_paths_default(self, mock): + expectedFiles = [('path','gp_dump_20160101010101.rpt'), + ('path', 'gp_dump_20160101010102.rpt'), + ('path2','gp_dump_20160101010103.rpt'), + ('path3','gp_dump_20160101010104.rpt')] + reportFiles = self.context.get_report_files_and_paths("/tmp") + self.assertEqual(expectedFiles, reportFiles) + + @patch('os.walk', return_value=[('path', ['dir1', 'dir2'], ['file1', 'file2']), + ('path2', ['dir3'], ['file3']),]) + def test_get_report_files_and_paths_no_report_files(self, mock): + with self.assertRaisesRegexp(Exception, "No report files located"): + self.context.get_report_files_and_paths("/dump_dir") + @patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: gzip', 'segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded']) def test_get_compress_and_dbname_from_report_file_normal_dbname_compression(self, mock1): - self.context.get_compress_and_dbname_from_report_file("report_file_name") - self.assertTrue(self.context.compress) - self.assertEquals(self.context.target_db, 'testdb') + compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name") + self.assertTrue(compress) + self.assertEquals(dbname, 'testdb') @patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: gzip', 'segment 0 (dbid 2) Host host Port 5433 Database "test""db" BackupFile /gp_dump_0_2_20160101010101: Succeeded']) def test_get_compress_and_dbname_from_report_file_special_dbname_compression(self, mock1): - self.context.get_compress_and_dbname_from_report_file("report_file_name") - self.assertTrue(self.context.compress) - self.assertEquals(self.context.target_db, '"test""db"') + compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name") + self.assertTrue(compress) + self.assertEquals(dbname, '"test""db"') @patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: None', 'segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded']) def test_get_compress_and_dbname_from_report_file_normal_dbname_no_compression(self, mock1): - self.context.get_compress_and_dbname_from_report_file("report_file_name") - self.assertFalse(self.context.compress) - self.assertEquals(self.context.target_db, 'testdb') + compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name") + self.assertFalse(compress) + self.assertEquals(dbname, 'testdb') @patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: None', 'segment 0 (dbid 2) Host host Port 5433 Database "test""db" BackupFile /gp_dump_0_2_20160101010101: Succeeded']) def test_get_compress_and_dbname_from_report_file_special_dbname_no_compression(self, mock1): - self.context.get_compress_and_dbname_from_report_file("report_file_name") - self.assertFalse(self.context.compress) - self.assertEquals(self.context.target_db, '"test""db"') + compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name") + self.assertFalse(compress) + self.assertEquals(dbname, '"test""db"') @patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded']) diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpdbrestore.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpdbrestore.py index 48bc67b3834e..ff57568b9604 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpdbrestore.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpdbrestore.py @@ -105,7 +105,7 @@ def setUp(self, mock1): @patch('gppylib.commands.unix.Ping.local') @patch('gppylib.commands.unix.Scp.run') - @patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file') + @patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file', return_value=('False', 'gptest')) @patch('gppylib.operations.backup_utils.Context.is_timestamp_in_old_format', return_value=False) @patch('gppylib.operations.unix.RawRemoteOperation.execute', return_value=True) @patch('gppylib.operations.unix.ListFilesByPattern.run', return_value=['gp_dump_20160101010101.rpt']) @@ -131,18 +131,36 @@ def test_validate_db_host_path_no_dump_dir_in_path(self, mock1): with self.assertRaisesRegexp(Exception, 'does not contain expected "db_dumps" directory'): self.gpdbrestore._validate_db_host_path() -#TODO: Decide whether to keep these tests or not - """ - def test_validate_db_host_path_bad_host(self, mock1): - self.context.db_host_path = ['foo.bar.com', '/tmp/db_dumps/20160101'] - with self.assertRaisesRegexp(Exception, 'Temporary failure in name resolution'): - self.gpdbrestore._validate_db_host_path() - - @patch('gppylib.commands.unix.Ping.local') - def test_validate_db_host_path_bad_path(self, mock1, mock2): - self.context.db_host_path = ['hostname', '/db_dumps/nonexistent/path'] - with self.assertRaisesRegexp(Exception, "Could not find path .* on host .*"): - self.gpdbrestore._validate_db_host_path()""" + @patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file', + side_effect=[(False, "gptest1"), (False, "gptest2")]) + @patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths', + return_value=[('path','gp_dump_20160101010101.rpt'), ('path', 'gp_dump_20160101010102.rpt')]) + def test_get_dump_timestamps_for_database_one_matching_file(self, mock1, mock2, mock3): + dbname = 'gptest2' + expected_timestamps = [20160101010102] + actual_timestamps = self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp") + self.assertEquals(expected_timestamps, actual_timestamps) + + @patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file', + side_effect=[(False, "gptest2"), (False, "gptest1"), (False, "gptest2")]) + @patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths', + return_value=[('path', 'gp_dump_20160101010101.rpt'), + ('path', 'gp_dump_20160101010102.rpt'), + ('path', 'gp_dump_20160101010103.rpt')]) + def test_get_dump_timestamps_for_database_two_matching_files(self, mock1, mock2, mock3): + dbname = 'gptest2' + expected_timestamps = [20160101010101, 20160101010103] + actual_timestamps = self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp") + self.assertEquals(expected_timestamps, actual_timestamps) + + @patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file', + side_effect=[(False, "gptest1"), (False, "gptest2")]) + @patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths', + return_value=[('path','gp_dump_20160101010101.rpt'), ('path', 'gp_dump_20160101010102.rpt')]) + def test_get_dump_timestamps_for_database_no_report_files_for_dbname(self, mock1, mock2, mock3): + dbname = 'gptest' + with self.assertRaisesRegexp(ExceptionNoStackTraceNeeded, 'No report files located with database gptest'): + self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp") @patch('gppylib.operations.unix.ListFilesByPattern.run', return_value=[]) def test_get_timestamp_for_validation_no_timestamps(self, mock1, mock2):