From 3deafed1c790e076481032536260a29ba2007401 Mon Sep 17 00:00:00 2001 From: Andrew Miller Date: Fri, 30 Mar 2012 14:34:23 -0400 Subject: [PATCH] Added (naive) support for following symlinks, up to three levels of symlinks. This is a hack that supports my use case of placing symlinks to files I want backed up Signed-off-by: Andrew Miller Added a 'continue' so that process() doesn't keep recursing Signed-off-by: Andrew Miller Added tests for test_backup_symlinks Signed-off-by: Andrew Miller --- src/allmydata/scripts/tahoe_backup.py | 26 ++++++----- src/allmydata/test/test_cli.py | 62 ++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index ef3da34b4f..97ecc8d830 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -159,7 +159,7 @@ def warn(self, msg): precondition(isinstance(msg, str), msg) print >>self.options.stderr, msg - def process(self, localpath): + def process(self, localpath, symlink_depth=0): precondition(isinstance(localpath, unicode), localpath) # returns newdircap @@ -182,14 +182,23 @@ def process(self, localpath): assert isinstance(child, unicode), child childpath = os.path.join(localpath, child) # note: symlinks to directories are both islink() and isdir() - if os.path.isdir(childpath) and not os.path.islink(childpath): - metadata = get_local_metadata(childpath) - # recurse on the child directory - childcap = self.process(childpath) + if os.path.isdir(childpath): + if os.path.islink(childpath): + if symlink_depth >= 3: + self.directories_skipped += 1 + self.warn("WARNING: symlink depth exceeded %s" % quote_output(childpath)) + continue + metadata = get_local_metadata(childpath) + # recurse on the child directory + childcap = self.process(childpath, symlink_depth+1) + else: + metadata = get_local_metadata(childpath) + # recurse on the child directory + childcap = self.process(childpath, symlink_depth) assert isinstance(childcap, str) create_contents[child] = ("dirnode", childcap, metadata) compare_contents[child] = childcap - elif os.path.isfile(childpath) and not os.path.islink(childpath): + elif os.path.isfile(childpath): try: childcap, metadata = self.upload(childpath) assert isinstance(childcap, str) @@ -200,10 +209,7 @@ def process(self, localpath): self.warn("WARNING: permission denied on file %s" % quote_output(childpath)) else: self.files_skipped += 1 - if os.path.islink(childpath): - self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath)) - else: - self.warn("WARNING: cannot backup special file %s" % quote_output(childpath)) + self.warn("WARNING: cannot backup special file %s" % quote_output(childpath)) must_create, r = self.check_backupdb_directory(compare_contents) if must_create: diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index 59e2c6aa2d..a29b470b69 100644 --- a/src/allmydata/test/test_cli.py +++ b/src/allmydata/test/test_cli.py @@ -2433,6 +2433,17 @@ def writeto(self, path, data): fileutil.make_dirs(os.path.dirname(full_path)) fileutil.write(full_path, data) + def mkdir(self, pathname): + fn = os.path.join(self.basedir, "home", unicode(pathname)) + fileutil.make_dirs(fn) + return fn + + def symlink(self, src, dst): + src_fn = os.path.abspath(os.path.join(self.basedir, "home", unicode(src))) + dst_fn = os.path.join(self.basedir, "home", unicode(dst)) + os.symlink(src_fn, dst_fn) + return dst_fn + def count_output(self, out): mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), " "(\d)+ files skipped, " @@ -2444,6 +2455,55 @@ def count_output2(self, out): mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out) return [int(s) for s in mo.groups()] + def test_backup_symlinks(self): + self.basedir = "cli/Backup/backup" + self.set_up_grid() + + # is the backupdb available? If so, we test that a second backup does + # not create new directories. + hush = StringIO() + have_bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"), + hush) + + # create a small local directory with a couple of files + source = os.path.join(self.basedir, "home") + self.writeto("parent/A/B.txt", "B.txt\n" * 1000) + subdir_sym = self.symlink("parent/A", "parent/A/A") + bar_sym = self.symlink("parent/A/B.txt", "parent/A/B_sym.txt") + + def do_backup(verbose=False): + cmd = ["backup"] + if verbose: + cmd.append("--verbose") + cmd.append(source) + cmd.append("tahoe:backups") + return self.do_cli(*cmd) + + d = self.do_cli("create-alias", "tahoe") + + if not have_bdb: + d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups")) + def _should_complain((rc, out, err)): + self.failUnless("I was unable to import a python sqlite library" in err, err) + d.addCallback(_should_complain) + d.addCallback(self.stall, 1.1) # make sure the backups get distinct timestamps + + d.addCallback(lambda res: do_backup()) + def _check0((rc, out, err)): + self.failUnlessReallyEqual(err, "WARNING: symlink depth exceeded 'cli/Backup/backup/home/parent/A/A/A/A/A'\n") + self.failUnlessReallyEqual(rc, 2) + fu, fr, fs, dc, dr, ds = self.count_output(out) + # B.txt and B_sym.txt, 4 times each. This is inefficient, right? + self.failUnlessReallyEqual(fu, 8) + self.failUnlessReallyEqual(fr, 0) + self.failUnlessReallyEqual(fs, 0) # skips A/A/A/A/A (the fourth symlink) + # home, home/A, home/A/A, ... home/A/A/A/A/A + self.failUnlessReallyEqual(dc, 6) + self.failUnlessReallyEqual(dr, 0) + self.failUnlessReallyEqual(ds, 1) + d.addCallback(_check0) + return d + def test_backup(self): self.basedir = "cli/Backup/backup" self.set_up_grid() @@ -2460,7 +2520,7 @@ def test_backup(self): self.writeto("parent/subdir/foo.txt", "foo") self.writeto("parent/subdir/bar.txt", "bar\n" * 1000) self.writeto("parent/blah.txt", "blah") - + def do_backup(verbose=False): cmd = ["backup"] if verbose: