Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dvc/cloud/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,11 @@ def _push(self, path):

def _makedirs(self, fname):
dname = os.path.dirname(fname)
if not os.path.exists(dname):
try:
os.makedirs(dname)
except OSError as e:
if e.errno != os.errno.EEXIST:
raise

def _pull_key(self, key, path):
""" Cloud-specific method of pulling keys """
Expand Down
20 changes: 14 additions & 6 deletions dvc/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,22 @@ def checkout(self):
for stage in self.stages():
stage.checkout()

def _used_cache(self):
def _used_cache(self, collect_dir=False):
clist = []
for stage in self.stages():
for out in stage.outs:
if not out.use_cache:
continue
if out.cache not in clist:
if not os.path.isdir(out.cache) and \
out.cache not in clist:
clist.append(out.cache)
if os.path.isdir(out.cache):
dir_cache = out.dir_cache()
clist.extend(dir_cache.values())
if collect_dir:
clist.extend([os.path.join(out.cache, f) for f in dir_cache.keys()])
else:
clist.append(out.cache)
return clist

def _remove_cache_file(self, cache):
Expand All @@ -188,22 +196,22 @@ def _remove_cache(self, cache):
os.rmdir(cache)

def gc(self):
clist = self._used_cache()
clist = self._used_cache(collect_dir=False)
for cache in self.cache.all():
if cache in clist:
continue
self._remove_cache(cache)
self.logger.info(u'\'{}\' was removed'.format(self.to_dvc_path(cache)))

def push(self, jobs=1):
self.cloud.push(self._used_cache(), jobs)
self.cloud.push(self._used_cache(collect_dir=True), jobs)

def pull(self, jobs=1):
self.cloud.pull(self._used_cache(), jobs)
self.cloud.pull(self._used_cache(collect_dir=False), jobs)
self.checkout()

def status(self, jobs=1):
return self.cloud.status(self._used_cache(), jobs)
return self.cloud.status(self._used_cache(collect_dir=True), jobs)

def graph(self):
G = nx.DiGraph()
Expand Down
9 changes: 7 additions & 2 deletions tests/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ class TestGC(TestDvc):
def setUp(self):
super(TestGC, self).setUp()

stage = self.dvc.add(self.FOO)
self.dvc.add(self.FOO)
self.dvc.add(self.DATA_DIR)
self.good_cache = self.dvc.cache.all()

self.bad_cache = []
Expand All @@ -26,6 +27,11 @@ def setUp(self):
self.create(path, i)
self.bad_cache.append(path)

path = os.path.join(self.dvc.cache.cache_dir, '45', '6', 'data')
os.mkdir(os.path.dirname(path))
self.create(path, 'md5: "123"')
self.bad_cache.append(path)

def test_api(self):
self.dvc.gc()
self._test_gc()
Expand All @@ -41,4 +47,3 @@ def _test_gc(self):

for c in self.good_cache:
self.assertTrue(os.path.exists(c))
self.assertTrue(os.path.isfile(c))