diff --git a/dvc/cloud/base.py b/dvc/cloud/base.py index 2bbb2beed4..7ea3400e53 100644 --- a/dvc/cloud/base.py +++ b/dvc/cloud/base.py @@ -125,8 +125,11 @@ def _push(self, path): def _makedirs(self, fname): dname = os.path.dirname(fname) - if not os.path.exists(dname): + try: os.makedirs(dname) + except OSError as e: + if e.errno != os.errno.EEXIST: + raise def _pull_key(self, key, path): """ Cloud-specific method of pulling keys """ diff --git a/dvc/project.py b/dvc/project.py index 4674accdba..99363beeb4 100644 --- a/dvc/project.py +++ b/dvc/project.py @@ -159,14 +159,22 @@ def checkout(self): for stage in self.stages(): stage.checkout() - def _used_cache(self): + def _used_cache(self, collect_dir=False): clist = [] for stage in self.stages(): for out in stage.outs: if not out.use_cache: continue - if out.cache not in clist: + if not os.path.isdir(out.cache) and \ + out.cache not in clist: clist.append(out.cache) + if os.path.isdir(out.cache): + dir_cache = out.dir_cache() + clist.extend(dir_cache.values()) + if collect_dir: + clist.extend([os.path.join(out.cache, f) for f in dir_cache.keys()]) + else: + clist.append(out.cache) return clist def _remove_cache_file(self, cache): @@ -188,7 +196,7 @@ def _remove_cache(self, cache): os.rmdir(cache) def gc(self): - clist = self._used_cache() + clist = self._used_cache(collect_dir=False) for cache in self.cache.all(): if cache in clist: continue @@ -196,14 +204,14 @@ def gc(self): self.logger.info(u'\'{}\' was removed'.format(self.to_dvc_path(cache))) def push(self, jobs=1): - self.cloud.push(self._used_cache(), jobs) + self.cloud.push(self._used_cache(collect_dir=True), jobs) def pull(self, jobs=1): - self.cloud.pull(self._used_cache(), jobs) + self.cloud.pull(self._used_cache(collect_dir=False), jobs) self.checkout() def status(self, jobs=1): - return self.cloud.status(self._used_cache(), jobs) + return self.cloud.status(self._used_cache(collect_dir=True), jobs) def graph(self): G = nx.DiGraph() diff --git a/tests/test_gc.py b/tests/test_gc.py index 253d67c23a..82fdcdb32d 100644 --- a/tests/test_gc.py +++ b/tests/test_gc.py @@ -17,7 +17,8 @@ class TestGC(TestDvc): def setUp(self): super(TestGC, self).setUp() - stage = self.dvc.add(self.FOO) + self.dvc.add(self.FOO) + self.dvc.add(self.DATA_DIR) self.good_cache = self.dvc.cache.all() self.bad_cache = [] @@ -26,6 +27,11 @@ def setUp(self): self.create(path, i) self.bad_cache.append(path) + path = os.path.join(self.dvc.cache.cache_dir, '45', '6', 'data') + os.mkdir(os.path.dirname(path)) + self.create(path, 'md5: "123"') + self.bad_cache.append(path) + def test_api(self): self.dvc.gc() self._test_gc() @@ -41,4 +47,3 @@ def _test_gc(self): for c in self.good_cache: self.assertTrue(os.path.exists(c)) - self.assertTrue(os.path.isfile(c))