Skip to content

Commit

Permalink
Split _search_for_latest and add unit tests
Browse files Browse the repository at this point in the history
- This also fixes a bug where we looked for the report file in the
  wrong directory

Signed-off-by: Karen Huddleston <khuddleston@pivotal.io>
  • Loading branch information
professor authored and khuddlefish committed Apr 25, 2017
1 parent 175f1f3 commit aaa88b2
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 53 deletions.
45 changes: 24 additions & 21 deletions gpMgmt/bin/gpdbrestore
Expand Up @@ -407,7 +407,8 @@ class GpdbRestore(Operation):

self._get_timestamp_for_validation(root)

self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report"))
report_filename = self.generate_filename("report")
self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename)

def _validate_db_host_path(self):
# The format of the -R option should be 'hostname:path_to_dumpset', hence the length should be 2 (hostname, path_to_dumpset)
Expand Down Expand Up @@ -439,7 +440,8 @@ class GpdbRestore(Operation):
Scp('Copying report file', srcHost=host, srcFile="%s/%s" % (path, file), dstFile="/tmp").run(validateAfter=True)

self._get_timestamp_for_validation("/tmp")
self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report", directory="/tmp"))
report_filename = self.context.generate_filename("report", directory="/tmp")
self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename)

local_files = ListFilesByPattern("/tmp", report_pattern).run()
for file in local_files:
Expand All @@ -463,31 +465,32 @@ class GpdbRestore(Operation):
self.context.timestamp = match[-18:-4]
self.context.use_old_filename_format = self.context.is_timestamp_in_old_format()

def _search_for_latest(self):
logger.info("Scanning Master host for latest dump file set for database %s" % self.search_for_dbname)
root = os.path.join(self.context.get_backup_root(), self.context.dump_dir)
candidates, timestamps = [], []
prefix = "%s*.rpt" % self.context.generate_prefix("report")
for path, dirs, files in os.walk(root):
matching = fnmatch.filter(files, "%s*" % prefix)
candidates.extend([(path, report_file) for report_file in matching])
if len(candidates) == 0:
raise ExceptionNoStackTraceNeeded("No report files located")
for path, report_file in candidates:
self.context.get_compress_and_dbname_from_report_file(os.path.join(path, report_file))
db_name = self.context.target_db
logger.debug("Looking for %s, found %s" % (self.search_for_dbname, db_name))
if self.search_for_dbname == db_name:
logger.info('Located dump file %s for database %s, adding to list' % (report_file, self.search_for_dbname))
def _get_dump_timestamps_for_database(self, search_for_dbname, backup_root):
timestamps = []
report_files = self.context.get_report_files_and_paths(backup_root)
for path, report_file in report_files:
_, report_file_dbname = self.context.get_compress_and_dbname_from_report_file(os.path.join(path, report_file))
logger.debug("Looking for %s, found %s" % (search_for_dbname, report_file_dbname))
if search_for_dbname == report_file_dbname:
logger.info('Located report file %s for database %s, adding to list' % (report_file, search_for_dbname))
timestamps.append(int(report_file[-18:-4]))
else:
logger.debug("Dump file has incorrect database name of %s, skipping..." % db_name)
logger.debug("Report file has incorrect database name of %s, skipping..." % report_file_dbname)
if len(timestamps) == 0:
raise ExceptionNoStackTraceNeeded("No report files located with database %s" % self.search_for_dbname)
raise ExceptionNoStackTraceNeeded("No report files located with database %s" % search_for_dbname)
return timestamps

def _search_for_latest(self):
logger.info("Scanning Master host for latest dump file set for database %s" % self.search_for_dbname)
backup_root = os.path.join(self.context.get_backup_root(), self.context.dump_dir)
timestamps = self._get_dump_timestamps_for_database(self.search_for_dbname, backup_root)
self.context.timestamp = str(max(timestamps))
logger.info("Identified latest dump timestamp for %s as %s" % (self.search_for_dbname, self.context.timestamp))

#Set context variables that we will need for the rest of the restore
self.context.use_old_filename_format = self.context.is_timestamp_in_old_format()
self.context.get_compress_and_dbname_from_report_file(self.context.generate_filename("report", directory=path))
report_filename = self.context.generate_filename("report")
self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_filename)

def _select_multi_file(self, dates_and_times):
spc = " "
Expand Down
23 changes: 17 additions & 6 deletions gpMgmt/bin/gppylib/operations/backup_utils.py
Expand Up @@ -257,21 +257,32 @@ def get_dump_dirs(self):
dirnames = sorted(dirnames, key=lambda x: int(os.path.basename(x)), reverse=True)
return dirnames

def get_report_files_and_paths(self, backup_root):
reports = []
prefix = "%s*.rpt" % self.generate_prefix("report")
for path, dirs, files in os.walk(backup_root):
matching = fnmatch.filter(files, "%s*" % prefix)
reports.extend([(path, report_file) for report_file in matching])
if len(reports) == 0:
raise Exception("No report files located")
return reports

def get_compress_and_dbname_from_report_file(self, report_file):
contents = get_lines_from_file(report_file)
self.compress = None
self.target_db = ""
compress = None
target_db = ""
name_pattern = re.compile(r'Port [0-9]+ Database (.*) BackupFile')
for line in contents:
if "Compression Program: gzip" in line:
self.compress = True
compress = True
elif "Compression Program: None" in line:
self.compress = False
compress = False
matching = name_pattern.search(line)
if matching and matching.group(1):
self.target_db = matching.group(1)
if self.compress is None or not self.target_db:
target_db = matching.group(1)
if compress is None or not target_db:
raise Exception("Could not determine database name and compression type from report file %s" % report_file)
return compress, target_db

def get_filename_for_content(context, filetype, content, remote_directory=None, host=None):
filetype_glob = context.generate_filename(filetype, content=content, directory=remote_directory)
Expand Down
2 changes: 1 addition & 1 deletion gpMgmt/bin/gppylib/operations/restore.py
Expand Up @@ -832,7 +832,7 @@ def __init__(self, context):
def validate_metadata_file(self):
report_file = self.context.generate_filename("report")
self.context.use_old_filename_format = self.context.is_timestamp_in_old_format()
self.context.get_compress_and_dbname_from_report_file(report_file)
self.context.compress, self.context.target_db = self.context.get_compress_and_dbname_from_report_file(report_file)

if self.context.ddboost:
# We pass in segment_dir='' because we don't want it included in our path for ddboost
Expand Down
42 changes: 30 additions & 12 deletions gpMgmt/bin/gppylib/operations/test/unit/test_unit_backup_utils.py
Expand Up @@ -1246,33 +1246,51 @@ def test__escapeDoubleQuoteInSQLString(self):
self.assertEqual('"MY\'DATE"', escapeDoubleQuoteInSQLString('''MY'DATE'''))
self.assertEqual('"MY""""DATE"', escapeDoubleQuoteInSQLString('MY""DATE'))


@patch('os.walk', return_value=[('path', ['dir1', 'dir2'], ['gp_dump_20160101010101.rpt', 'file2', 'gp_dump_20160101010102.rpt']),
('path2', ['dir3'], ['gp_dump_20160101010103.rpt']),
('path3', ['dir4', 'dir5'], ['file5', 'gp_dump_20160101010104.rpt'])])
def test_get_report_files_and_paths_default(self, mock):
expectedFiles = [('path','gp_dump_20160101010101.rpt'),
('path', 'gp_dump_20160101010102.rpt'),
('path2','gp_dump_20160101010103.rpt'),
('path3','gp_dump_20160101010104.rpt')]
reportFiles = self.context.get_report_files_and_paths("/tmp")
self.assertEqual(expectedFiles, reportFiles)

@patch('os.walk', return_value=[('path', ['dir1', 'dir2'], ['file1', 'file2']),
('path2', ['dir3'], ['file3']),])
def test_get_report_files_and_paths_no_report_files(self, mock):
with self.assertRaisesRegexp(Exception, "No report files located"):
self.context.get_report_files_and_paths("/dump_dir")

@patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: gzip',
'segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded'])
def test_get_compress_and_dbname_from_report_file_normal_dbname_compression(self, mock1):
self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertTrue(self.context.compress)
self.assertEquals(self.context.target_db, 'testdb')
compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertTrue(compress)
self.assertEquals(dbname, 'testdb')

@patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: gzip',
'segment 0 (dbid 2) Host host Port 5433 Database "test""db" BackupFile /gp_dump_0_2_20160101010101: Succeeded'])
def test_get_compress_and_dbname_from_report_file_special_dbname_compression(self, mock1):
self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertTrue(self.context.compress)
self.assertEquals(self.context.target_db, '"test""db"')
compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertTrue(compress)
self.assertEquals(dbname, '"test""db"')

@patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: None',
'segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded'])
def test_get_compress_and_dbname_from_report_file_normal_dbname_no_compression(self, mock1):
self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertFalse(self.context.compress)
self.assertEquals(self.context.target_db, 'testdb')
compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertFalse(compress)
self.assertEquals(dbname, 'testdb')

@patch('gppylib.operations.backup_utils.get_lines_from_file', return_value=['Compression Program: None',
'segment 0 (dbid 2) Host host Port 5433 Database "test""db" BackupFile /gp_dump_0_2_20160101010101: Succeeded'])
def test_get_compress_and_dbname_from_report_file_special_dbname_no_compression(self, mock1):
self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertFalse(self.context.compress)
self.assertEquals(self.context.target_db, '"test""db"')
compress, dbname = self.context.get_compress_and_dbname_from_report_file("report_file_name")
self.assertFalse(compress)
self.assertEquals(dbname, '"test""db"')

@patch('gppylib.operations.backup_utils.get_lines_from_file',
return_value=['segment 0 (dbid 2) Host host Port 5433 Database testdb BackupFile /gp_dump_0_2_20160101010101: Succeeded'])
Expand Down
44 changes: 31 additions & 13 deletions gpMgmt/bin/gppylib/test/unit/test_unit_gpdbrestore.py
Expand Up @@ -105,7 +105,7 @@ def setUp(self, mock1):

@patch('gppylib.commands.unix.Ping.local')
@patch('gppylib.commands.unix.Scp.run')
@patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file')
@patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file', return_value=('False', 'gptest'))
@patch('gppylib.operations.backup_utils.Context.is_timestamp_in_old_format', return_value=False)
@patch('gppylib.operations.unix.RawRemoteOperation.execute', return_value=True)
@patch('gppylib.operations.unix.ListFilesByPattern.run', return_value=['gp_dump_20160101010101.rpt'])
Expand All @@ -131,18 +131,36 @@ def test_validate_db_host_path_no_dump_dir_in_path(self, mock1):
with self.assertRaisesRegexp(Exception, 'does not contain expected "db_dumps" directory'):
self.gpdbrestore._validate_db_host_path()

#TODO: Decide whether to keep these tests or not
"""
def test_validate_db_host_path_bad_host(self, mock1):
self.context.db_host_path = ['foo.bar.com', '/tmp/db_dumps/20160101']
with self.assertRaisesRegexp(Exception, 'Temporary failure in name resolution'):
self.gpdbrestore._validate_db_host_path()
@patch('gppylib.commands.unix.Ping.local')
def test_validate_db_host_path_bad_path(self, mock1, mock2):
self.context.db_host_path = ['hostname', '/db_dumps/nonexistent/path']
with self.assertRaisesRegexp(Exception, "Could not find path .* on host .*"):
self.gpdbrestore._validate_db_host_path()"""
@patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file',
side_effect=[(False, "gptest1"), (False, "gptest2")])
@patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths',
return_value=[('path','gp_dump_20160101010101.rpt'), ('path', 'gp_dump_20160101010102.rpt')])
def test_get_dump_timestamps_for_database_one_matching_file(self, mock1, mock2, mock3):
dbname = 'gptest2'
expected_timestamps = [20160101010102]
actual_timestamps = self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp")
self.assertEquals(expected_timestamps, actual_timestamps)

@patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file',
side_effect=[(False, "gptest2"), (False, "gptest1"), (False, "gptest2")])
@patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths',
return_value=[('path', 'gp_dump_20160101010101.rpt'),
('path', 'gp_dump_20160101010102.rpt'),
('path', 'gp_dump_20160101010103.rpt')])
def test_get_dump_timestamps_for_database_two_matching_files(self, mock1, mock2, mock3):
dbname = 'gptest2'
expected_timestamps = [20160101010101, 20160101010103]
actual_timestamps = self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp")
self.assertEquals(expected_timestamps, actual_timestamps)

@patch('gppylib.operations.backup_utils.Context.get_compress_and_dbname_from_report_file',
side_effect=[(False, "gptest1"), (False, "gptest2")])
@patch('gppylib.operations.backup_utils.Context.get_report_files_and_paths',
return_value=[('path','gp_dump_20160101010101.rpt'), ('path', 'gp_dump_20160101010102.rpt')])
def test_get_dump_timestamps_for_database_no_report_files_for_dbname(self, mock1, mock2, mock3):
dbname = 'gptest'
with self.assertRaisesRegexp(ExceptionNoStackTraceNeeded, 'No report files located with database gptest'):
self.gpdbrestore._get_dump_timestamps_for_database(dbname, "/tmp")

@patch('gppylib.operations.unix.ListFilesByPattern.run', return_value=[])
def test_get_timestamp_for_validation_no_timestamps(self, mock1, mock2):
Expand Down

0 comments on commit aaa88b2

Please sign in to comment.