Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Added (naive) support for following symlinks, up to three levels of s…
…ymlinks. This is a hack that supports my use case of placing symlinks to files I want backed up

Signed-off-by: Andrew Miller <amiller@dappervision.com>

Added a 'continue' so that process() doesn't keep recursing

Signed-off-by: Andrew Miller <amiller@dappervision.com>

Added tests for test_backup_symlinks

Signed-off-by: Andrew Miller <amiller@dappervision.com>
  • Loading branch information
amiller committed Mar 31, 2012
1 parent c5e10e2 commit 3deafed
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 11 deletions.
26 changes: 16 additions & 10 deletions src/allmydata/scripts/tahoe_backup.py
Expand Up @@ -159,7 +159,7 @@ def warn(self, msg):
precondition(isinstance(msg, str), msg)
print >>self.options.stderr, msg

def process(self, localpath):
def process(self, localpath, symlink_depth=0):
precondition(isinstance(localpath, unicode), localpath)
# returns newdircap

Expand All @@ -182,14 +182,23 @@ def process(self, localpath):
assert isinstance(child, unicode), child
childpath = os.path.join(localpath, child)
# note: symlinks to directories are both islink() and isdir()
if os.path.isdir(childpath) and not os.path.islink(childpath):
metadata = get_local_metadata(childpath)
# recurse on the child directory
childcap = self.process(childpath)
if os.path.isdir(childpath):
if os.path.islink(childpath):
if symlink_depth >= 3:
self.directories_skipped += 1
self.warn("WARNING: symlink depth exceeded %s" % quote_output(childpath))
continue
metadata = get_local_metadata(childpath)
# recurse on the child directory
childcap = self.process(childpath, symlink_depth+1)
else:
metadata = get_local_metadata(childpath)
# recurse on the child directory
childcap = self.process(childpath, symlink_depth)
assert isinstance(childcap, str)
create_contents[child] = ("dirnode", childcap, metadata)
compare_contents[child] = childcap
elif os.path.isfile(childpath) and not os.path.islink(childpath):
elif os.path.isfile(childpath):
try:
childcap, metadata = self.upload(childpath)
assert isinstance(childcap, str)
Expand All @@ -200,10 +209,7 @@ def process(self, localpath):
self.warn("WARNING: permission denied on file %s" % quote_output(childpath))
else:
self.files_skipped += 1
if os.path.islink(childpath):
self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath))
else:
self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))
self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))

must_create, r = self.check_backupdb_directory(compare_contents)
if must_create:
Expand Down
62 changes: 61 additions & 1 deletion src/allmydata/test/test_cli.py
Expand Up @@ -2433,6 +2433,17 @@ def writeto(self, path, data):
fileutil.make_dirs(os.path.dirname(full_path))
fileutil.write(full_path, data)

def mkdir(self, pathname):
fn = os.path.join(self.basedir, "home", unicode(pathname))
fileutil.make_dirs(fn)
return fn

def symlink(self, src, dst):
src_fn = os.path.abspath(os.path.join(self.basedir, "home", unicode(src)))
dst_fn = os.path.join(self.basedir, "home", unicode(dst))
os.symlink(src_fn, dst_fn)
return dst_fn

def count_output(self, out):
mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
"(\d)+ files skipped, "
Expand All @@ -2444,6 +2455,55 @@ def count_output2(self, out):
mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
return [int(s) for s in mo.groups()]

def test_backup_symlinks(self):
self.basedir = "cli/Backup/backup"
self.set_up_grid()

# is the backupdb available? If so, we test that a second backup does
# not create new directories.
hush = StringIO()
have_bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
hush)

# create a small local directory with a couple of files
source = os.path.join(self.basedir, "home")
self.writeto("parent/A/B.txt", "B.txt\n" * 1000)
subdir_sym = self.symlink("parent/A", "parent/A/A")
bar_sym = self.symlink("parent/A/B.txt", "parent/A/B_sym.txt")

def do_backup(verbose=False):
cmd = ["backup"]
if verbose:
cmd.append("--verbose")
cmd.append(source)
cmd.append("tahoe:backups")
return self.do_cli(*cmd)

d = self.do_cli("create-alias", "tahoe")

if not have_bdb:
d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:backups"))
def _should_complain((rc, out, err)):
self.failUnless("I was unable to import a python sqlite library" in err, err)
d.addCallback(_should_complain)
d.addCallback(self.stall, 1.1) # make sure the backups get distinct timestamps

d.addCallback(lambda res: do_backup())
def _check0((rc, out, err)):
self.failUnlessReallyEqual(err, "WARNING: symlink depth exceeded 'cli/Backup/backup/home/parent/A/A/A/A/A'\n")
self.failUnlessReallyEqual(rc, 2)
fu, fr, fs, dc, dr, ds = self.count_output(out)
# B.txt and B_sym.txt, 4 times each. This is inefficient, right?
self.failUnlessReallyEqual(fu, 8)
self.failUnlessReallyEqual(fr, 0)
self.failUnlessReallyEqual(fs, 0) # skips A/A/A/A/A (the fourth symlink)
# home, home/A, home/A/A, ... home/A/A/A/A/A
self.failUnlessReallyEqual(dc, 6)
self.failUnlessReallyEqual(dr, 0)
self.failUnlessReallyEqual(ds, 1)
d.addCallback(_check0)
return d

def test_backup(self):
self.basedir = "cli/Backup/backup"
self.set_up_grid()
Expand All @@ -2460,7 +2520,7 @@ def test_backup(self):
self.writeto("parent/subdir/foo.txt", "foo")
self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
self.writeto("parent/blah.txt", "blah")

def do_backup(verbose=False):
cmd = ["backup"]
if verbose:
Expand Down

0 comments on commit 3deafed

Please sign in to comment.