-
Couldn't load subscription status.
- Fork 1.3k
Closed
Labels
awaiting responsewe are waiting for your reply, please respond! :)we are waiting for your reply, please respond! :)
Description
Looks like under specific conditions sqlite db is not initialized, which leads to an error above. Script I use to reproduce:
git clone https://github.com/iterative/johnnie-gcp
cd johnnie-gcp
cat > fetch_cache.py # copy paste from below
python ./fetch_cache.py # fetch_cache.py
import os, json
import logging
logger = logging.getLogger("fcache")
CREDS = {... fill in GCP creds ...}
def _fetch_cache(repo):
from dvc.cache import NamedCache
cache = NamedCache()
for rev in repo.brancher(all_commits=True):
try:
# Collect all cache to fetch it later in parallel
for stage in repo.stages:
for out in stage.outs:
if (out.plot or out.metric) and out.cache:
cache.update(out.get_used_cache())
except Exception as e:
logger.warning(f"Cache collection failed: {e}", extra={"rev": rev})
logger.info("Downloading remote cache")
return repo.cloud.pull(cache)
logger.info("Done downloading remote cache")
def _make_config(repo, creds):
filename = os.path.join(repo.dvc_dir, "creds.json")
with repo.config.edit() as conf:
conf["remote"]["my-gcs"]["credentialpath"] = filename
with open(filename, mode="w") as fd:
fd.write(json.dumps(creds))
from dvc.repo import Repo
repo = Repo('.')
_make_config(repo, CREDS)
_fetch_cache(repo)This downloads required cache files and then errors out with:
Traceback (most recent call last):
File "../../fetch_cache.py", line 55, in <module>
_fetch_cache(repo)
File "../../fetch_cache.py", line 37, in _fetch_cache
return repo.cloud.pull(cache)
File "/home/suor/projects/dvc/dvc/data_cloud.py", line 89, in pull
self._save_pulled_checksums(cache)
File "/home/suor/projects/dvc/dvc/data_cloud.py", line 101, in _save_pulled_checksums
self.repo.state.save(cache_file, checksum)
File "/home/suor/projects/dvc/dvc/state.py", line 372, in save
existing_record = self.get_state_record_for_inode(actual_inode)
File "/home/suor/projects/dvc/dvc/state.py", line 350, in get_state_record_for_inode
self._execute(cmd, (self._to_sqlite(inode),))
File "/home/suor/projects/dvc/dvc/state.py", line 131, in _execute
return self.cursor.execute(cmd, parameters)
AttributeError: 'NoneType' object has no attribute 'execute'Subsequent invocation fails with same error but in another place:
Traceback (most recent call last):
File "../../fetch_cache.py", line 55, in <module>
_fetch_cache(repo)
File "../../fetch_cache.py", line 37, in _fetch_cache
return repo.cloud.pull(cache)
File "/home/suor/projects/dvc/dvc/data_cloud.py", line 84, in pull
downloaded_items_num = self.repo.cache.local.pull(
File "/home/suor/projects/dvc/dvc/remote/base.py", line 79, in wrapper
return f(obj, named_cache, remote, *args, **kwargs)
File "/home/suor/projects/dvc/dvc/remote/local.py", line 705, in pull
return self._process(
File "/home/suor/projects/dvc/dvc/remote/local.py", line 605, in _process
dir_status, file_status, dir_contents = self._status(
File "/home/suor/projects/dvc/dvc/remote/local.py", line 448, in _status
self.hashes_exist(md5s, jobs=jobs, name=self.cache_dir)
File "/home/suor/projects/dvc/dvc/remote/local.py", line 382, in hashes_exist
return [
File "/home/suor/projects/dvc/dvc/remote/local.py", line 390, in <listcomp>
if not self.changed_cache_file(hash_)
File "/home/suor/projects/dvc/dvc/remote/base.py", line 1191, in changed_cache_file
actual = self.get_hash(cache_info)
File "/home/suor/projects/dvc/dvc/remote/base.py", line 924, in get_hash
return self.tree.get_hash(path_info, **kwargs)
File "/home/suor/projects/dvc/dvc/remote/base.py", line 280, in get_hash
hash_ = self.state.get(path_info)
File "/home/suor/projects/dvc/dvc/state.py", line 406, in get
existing_record = self.get_state_record_for_inode(actual_inode)
File "/home/suor/projects/dvc/dvc/state.py", line 350, in get_state_record_for_inode
self._execute(cmd, (self._to_sqlite(inode),))
File "/home/suor/projects/dvc/dvc/state.py", line 131, in _execute
return self.cursor.execute(cmd, parameters)
AttributeError: 'NoneType' object has no attribute 'execute'Output of dvc version:
$ dvc version
DVC version: 1.1.1+c6a024
Python version: 3.8.2
Platform: Linux-5.4.0-31-generic-x86_64-with-glibc2.27
Binary: False
Package: None
Supported remotes: azure, gdrive, gs, hdfs, http, https, s3, ssh, oss
Cache: reflink - not supported, hardlink - supported, symlink - supported
Filesystem type (cache directory): ('ecryptfs', '/home/suor/.Private')
Repo: dvc, git
Filesystem type (workspace): ('ecryptfs', '/home/suor/.Private')Metadata
Metadata
Assignees
Labels
awaiting responsewe are waiting for your reply, please respond! :)we are waiting for your reply, please respond! :)