Skip to content

Commit

Permalink
Move syncutil code into sync.py. (#869)
Browse files Browse the repository at this point in the history
* Move syncutil code into sync.py.

* Update changelog.

* Move dircmp_deep to _dircmp_deep.

* Also rename logging bits for good measure.
  • Loading branch information
vyasr committed Dec 8, 2022
1 parent 35b862f commit 16a33c7
Show file tree
Hide file tree
Showing 4 changed files with 272 additions and 279 deletions.
1 change: 1 addition & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Changed
- The package namespace has been flattened so that most functionality is directly available in the ``signac`` namespace (#756, #868).
- The ``calc_id`` function has been moved from the ``hashing`` module to the ``job`` module (#873).
- Tests are run with all warnings treated as errors (#871).
- The contents of the ``syncutil`` module have been moved into ``sync`` (#869).

Removed
+++++++
Expand Down
273 changes: 270 additions & 3 deletions signac/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,25 @@
dst_job.sync(src_job, doc_sync=sync.DocSync.ByKey('foo'))
"""
import logging
import os
import re
import shutil
from collections import defaultdict as ddict
from collections import namedtuple
from collections.abc import Mapping
from contextlib import contextmanager
from copy import deepcopy
from filecmp import cmpfiles, dircmp
from multiprocessing.pool import ThreadPool

from ._utility import _query_yes_no
from ._utility import _query_yes_no, _safe_relpath
from .errors import (
DestinationExistsError,
DocumentSyncConflict,
FileSyncConflict,
SchemaSyncConflict,
)
from .syncutil import _FileModifyProxy, dircmp, dircmp_deep, logger

__all__ = [
"FileSync",
Expand All @@ -96,6 +100,269 @@
"sync_projects",
]

# Definition of helpers for syncing


_LEVEL_MORE = logging.INFO - 5

logger = logging.getLogger("sync")
logging.addLevelName(_LEVEL_MORE, "MORE")
logging.MORE = _LEVEL_MORE # type: ignore


def _log_more(msg, *args, **kwargs):
"""Log using _LEVEL_MORE."""
logger.log(_LEVEL_MORE, msg, *args, **kwargs)


logger.more = _log_more # type: ignore


class _dircmp_deep(dircmp):
"""Deep directory comparator."""

def phase3(self):
"""Find out differences between common files."""
xx = cmpfiles(self.left, self.right, self.common_files, shallow=False)
self.same_files, self.diff_files, self.funny_files = xx

methodmap = dict(dircmp.methodmap)
# The type check for the following line must be ignored.
# See: https://github.com/python/mypy/issues/708
methodmap["same_files"] = methodmap["diff_files"] = phase3 # type: ignore


class _DocProxy:
"""Proxy object for document (mapping) modifications.
This proxy is used to keep track of changes and ensure that
dry runs do not actually modify any data.
Parameters
----------
doc : dict
Document data.
dry_run : bool, optional
Do not actually perform any data modification operation, but still log
the action (Default value = False).
"""

def __init__(self, doc, dry_run=False):
self.doc = doc
self.dry_run = dry_run

def __str__(self):
return f"_DocProxy({str(self.doc)})"

def __repr__(self):
return f"_DocProxy({repr(self.doc)})"

def __getitem__(self, key):
return self.doc[key]

def __setitem__(self, key, value):
logger.more(f"Set '{key}'='{value}'.")
if not self.dry_run:
self.doc[key] = value

def keys(self):
"""Return keys of proxy data."""
return self.doc.keys()

def clear(self):
"""Clear proxy data."""
self.doc.clear()

def update(self, other):
"""Update proxy data with other."""
for key in other.keys():
self[key] = other[key]

def __iter__(self):
return iter(self.doc)

def __contains__(self, key):
return key in self.doc

def __eq__(self, other):
return self.doc.__eq__(other)

def __len__(self):
return len(self.doc)


class _FileModifyProxy:
"""Proxy used for data modification.
This proxy is used for all file data modification to keep
track of changes and to ensure that dry runs do not actually
modify any data.
Parameters
----------
root : str, optional
Root path.
follow_symlinks : bool, optional
Whether to follow symlinks (Default value = True).
permissions : bool, optional
Whether to preserve permissions (Default value = False).
times : bool, optional
Whether to preserve timestamps (Default value = False).
owner : bool, optional
Whether to preserve owner (Default value = False).
group : bool, optional
Whether to preserve group (Default value = False).
dry_run : bool, optional
If True, do not actually perform any data modification operation, but still log
the action (Default value = False).
collect_stats : bool, optional
Whether to collect stats (Default value = False).
"""

def __init__(
self,
root=None,
follow_symlinks=True,
permissions=False,
times=False,
owner=False,
group=False,
dry_run=False,
collect_stats=False,
):
self.root = root
self.follow_symlinks = follow_symlinks
self.permissions = permissions
self.times = times
self.owner = owner
self.group = group
self.dry_run = dry_run
self.stats = dict(num_files=0, volume=0) if collect_stats else None

# Internal proxy functions

def _copy(self, src, dst):
"""Copy src to dst."""
if not self.dry_run:
shutil.copy(src, dst)

def _copy_p(self, src, dst):
"""Copy src to dst with permissions."""
if not self.dry_run:
shutil.copy(src, dst)
shutil.copymode(src, dst)

def _copy2(self, src, dst):
"""Copy src to dst with preserved metadata."""
if not self.dry_run:
shutil.copy2(src, dst)

def _remove(self, path):
"""Remove path."""
if not self.dry_run:
os.remove(path)

# Public functions

def remove(self, path):
"""Remove path."""
logger.more(f"Remove path '{_safe_relpath(path)}'.")
self._remove(path)

def copy(self, src, dst):
"""Copy src to dst."""
if self.dry_run and self.root is not None:
print(_safe_relpath(src, self.root))
if os.path.islink(src) and not self.follow_symlinks:
link_target = os.readlink(src)
logger.more(
"Creating link '{}' -> '{}'.".format(
_safe_relpath(dst), _safe_relpath(link_target)
)
)
if os.path.isfile(dst):
self.remove(dst)
if not self.dry_run:
os.symlink(link_target, dst)
else:
msg = "Copy file '{}' -> '{}'.".format(
_safe_relpath(src), _safe_relpath(dst)
)
if self.permissions and self.times:
logger.more(msg.format(" (preserving: permissions, times)"))
self._copy2(src, dst)
elif self.permissions:
logger.more(msg.format(" (preserving: permissions)"))
self._copy_p(src, dst)
elif self.times:
raise ValueError("Cannot copy timestamps without permissions.")
else:
logger.more(msg.format(""))
self._copy(src, dst)
if self.owner or self.group or self.stats is not None:
stat = os.stat(src)
if self.stats is not None:
self.stats["num_files"] += 1
self.stats["volume"] += stat.st_size
if self.owner or self.group:
logger.more(
"Copy owner/group '{}' -> '{}'".format(
_safe_relpath(src), _safe_relpath(dst)
)
)
if not self.dry_run:
os.chown(
dst,
uid=stat.st_uid if self.owner else -1,
gid=stat.st_gid if self.group else -1,
)

def copytree(self, src, dst, **kwargs):
"""Copy tree src to dst."""
logger.more(f"Copy tree '{_safe_relpath(src)}' -> '{_safe_relpath(dst)}'.")
shutil.copytree(src, dst, copy_function=self.copy, **kwargs)

@contextmanager
def create_backup(self, path):
"""Create a backup of path."""
logger.debug(f"Create backup of '{_safe_relpath(path)}'.")
path_backup = path + "~"
if os.path.isfile(path_backup):
raise RuntimeError(
"Failed to create backup, file already exists: '{}'.".format(
_safe_relpath(path_backup)
)
)
try:
self._copy2(path, path_backup)
yield path_backup
except: # noqa roll-back
logger.more("Error occurred, restoring backup...")
self._copy2(path_backup, path)
raise
finally:
logger.debug(f"Remove backup of '{_safe_relpath(path)}'.")
self._remove(path_backup)

@contextmanager
def create_doc_backup(self, doc):
"""Create a backup of doc."""
proxy = _DocProxy(doc, dry_run=self.dry_run)
fn = getattr(doc, "filename", getattr(doc, "_filename", None))
if not len(proxy) or fn is None or not os.path.isfile(fn):
backup = deepcopy(doc) # use in-memory backup
try:
yield proxy
except: # noqa roll-back
proxy.clear()
proxy.update(backup)
raise
else:
with self.create_backup(fn):
yield proxy


# Definition of default sync strategies

Expand Down Expand Up @@ -209,7 +476,7 @@ def _sync_job_workspaces(
):
"""Synchronize two job workspaces file by file, following the provided strategy."""
if deep:
diff = dircmp_deep(src.fn(subdir), dst.fn(subdir))
diff = _dircmp_deep(src.fn(subdir), dst.fn(subdir))
else:
diff = dircmp(src.fn(subdir), dst.fn(subdir))

Expand Down

0 comments on commit 16a33c7

Please sign in to comment.