Skip to content

Commit

Permalink
Refactor, mainly to optimize _update further.
Browse files Browse the repository at this point in the history
Also add a new 'benchmark' workflow.
  • Loading branch information
jab committed Jan 14, 2024
1 parent d718b9b commit 9d451d1
Show file tree
Hide file tree
Showing 15 changed files with 2,910 additions and 99 deletions.
2,723 changes: 2,723 additions & 0 deletions .benchmarks/Linux-CPython-3.12-64bit/baseline.json

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: benchmark

"on":
push:
branches:
- main
- dev
- deps
pull_request:
branches:
- main
workflow_dispatch:

env:
FORCE_COLOR: "1"

jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- name: check out source
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
- name: set up Python
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
with:
python-version: '3.12'
cache: pip
cache-dependency-path: dev-deps/python3.12/test.txt
- name: install dependencies
run: |
python -m pip install -U pip setuptools wheel
python -m pip install -r dev-deps/python3.12/test.txt
python -m pip install .
- name: run microbenchmarks
run: |
pytest -n0 --benchmark-autosave --benchmark-compare=.benchmarks/Linux-CPython-3.12-64bit/baseline.json --benchmark-group-by=name tests/microbenchmarks.py
- name: archive benchmark results
uses: actions/upload-artifact@1eb3cb2b3e0f29609092a73eb033bb759a334595
with:
name: microbenchmark results
path: .benchmarks
if-no-files-found: error

permissions:
contents: read
8 changes: 5 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: test
# This name appears as the text in the build badge: https://github.com/jab/bidict/actions/workflows/test.yml/badge.svg
name: tests

"on":
workflow_dispatch:
Expand Down Expand Up @@ -27,10 +28,11 @@ jobs:
include:
# https://github.com/actions/python-versions/blob/main/versions-manifest.json
- pyversion: "3.12"
enable_coverage: true
enable_coverage: true # typing.override built-in
- pyversion: "3.11"
enable_coverage: true
enable_coverage: true # typing.override shimmed, operator.call built-in
- pyversion: "3.10"
enable_coverage: true # typing.override shimmed, operator.call shimmed
- pyversion: "3.9"
- pyversion: "3.8"
- pyversion: "pypy-3.10"
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
exclude: ^\.benchmarks

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
Expand Down
10 changes: 8 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ please consider sponsoring bidict on GitHub.`
-------------------------

Primarily, this release simplifies bidict by removing minor features
that are no longer necessary or evidently are substantially unused.
that are no longer necessary or evidently are substantially unused,
and includes some minor performance improvements.
These simplifications will make it easier to improve bidict in the future,
including further potential performance improvements.
It also contains several other minor improvements.

It also contains several other improvements.

- Drop support for Python 3.7,
which reached end of life on 2023-06-27,
Expand Down Expand Up @@ -62,6 +64,10 @@ It also contains several other minor improvements.
``bidict(None)``, ``bi.update(False)``, etc.
would fail to raise a :class:`TypeError`.

- Further optimize performance of
:meth:`~bidict.bidict.update`
and related methods.

- All :meth:`~bidict.bidict.__init__`,
:meth:`~bidict.bidict.update`,
and related methods
Expand Down
6 changes: 1 addition & 5 deletions bidict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@
from ._bidict import bidict as bidict
from ._dup import DROP_NEW as DROP_NEW
from ._dup import DROP_OLD as DROP_OLD
from ._dup import OD as OD
from ._dup import ON_DUP_DEFAULT as ON_DUP_DEFAULT
from ._dup import ON_DUP_DROP_OLD as ON_DUP_DROP_OLD
from ._dup import ON_DUP_RAISE as ON_DUP_RAISE
from ._dup import RAISE as RAISE
from ._dup import OnDup as OnDup
from ._dup import OnDupAction as OnDupAction
from ._exc import BidictException as BidictException
from ._exc import DuplicationError as DuplicationError
from ._exc import KeyAndValueDuplicationError as KeyAndValueDuplicationError
Expand All @@ -88,10 +88,6 @@
from .metadata import __version__ as __version__


#: Alias
OnDupAction = OD


# Set __module__ of re-exported classes to the 'bidict' top-level module, so that e.g.
# 'bidict.bidict' shows up as 'bidict.bidict` rather than 'bidict._bidict.bidict'.
for _obj in tuple(locals().values()): # pragma: no cover
Expand Down
135 changes: 82 additions & 53 deletions bidict/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import typing as t
import weakref
from collections import deque
from functools import partial
from itertools import starmap
from operator import eq
Expand All @@ -40,16 +41,26 @@
from ._typing import OKT
from ._typing import OVT
from ._typing import VT
from ._typing import Items
from ._typing import Maplike
from ._typing import MapOrItems
from ._typing import override


call: t.Callable[[t.Callable[[], None]], None]
try:
from operator import call # Python 3.11+
except ImportError: # Python < 3.11 compat
from operator import methodcaller

call = methodcaller('__call__')


OldKV: t.TypeAlias = t.Tuple[OKT[KT], OVT[VT]]
DedupResult: t.TypeAlias = t.Optional[OldKV[KT, VT]]
Write: t.TypeAlias = t.List[t.Callable[[], None]]
Write: t.TypeAlias = t.Callable[[], None]
Unwrite: t.TypeAlias = Write
PreparedWrite: t.TypeAlias = t.Tuple[Write, Unwrite]
WriteSpec: t.TypeAlias = t.Tuple[t.List[Write], t.List[Unwrite]]
BT = t.TypeVar('BT', bound='BidictBase[t.Any, t.Any]')


Expand Down Expand Up @@ -162,7 +173,7 @@ def __init__(self, arg: MapOrItems[KT, VT] = (), /, **kw: VT) -> None:
"""
self._fwdm = self._fwdm_cls()
self._invm = self._invm_cls()
self._update(arg, kw, rbof=False)
self._update(arg, kw, rollback=False)

# If Python ever adds support for higher-kinded types, `inverse` could use them, e.g.
# def inverse(self: BT[KT, VT]) -> BT[VT, KT]:
Expand Down Expand Up @@ -249,8 +260,8 @@ def keys(self) -> t.KeysView[KT]:
- having a .mapping attribute in Python 3.10+
that exposes a mappingproxy to *b._fwdm*.
"""
fwdm = self._fwdm
return fwdm.keys() if isinstance(fwdm, dict) else BidictKeysView(self)
fwdm, fwdm_cls = self._fwdm, self._fwdm_cls
return fwdm.keys() if fwdm_cls is dict else BidictKeysView(self)

@override
def items(self) -> t.ItemsView[KT, VT]:
Expand All @@ -267,7 +278,7 @@ def items(self) -> t.ItemsView[KT, VT]:
- having a .mapping attribute in Python 3.10+
that exposes a mappingproxy to *b._fwdm*.
"""
return self._fwdm.items() if isinstance(self._fwdm, dict) else super().items()
return self._fwdm.items() if self._fwdm_cls is dict else super().items()

# The inherited collections.abc.Mapping.__contains__() method is implemented by doing a `try`
# `except KeyError` around `self[key]`. The following implementation is much faster,
Expand Down Expand Up @@ -362,7 +373,7 @@ def _dedup(self, key: KT, val: VT, on_dup: OnDup) -> DedupResult[KT, VT]:
# else neither isdupkey nor isdupval.
return oldkey, oldval

def _prep_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrite: bool) -> PreparedWrite:
def _prep_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT], save_unwrite: bool) -> WriteSpec:
"""Given (newkey, newval) to insert, return the list of operations necessary to perform the write.
*oldkey* and *oldval* are as returned by :meth:`_dedup`.
Expand All @@ -377,111 +388,129 @@ def _prep_write(self, newkey: KT, newval: VT, oldkey: OKT[KT], oldval: OVT[VT],
fwdm, invm = self._fwdm, self._invm
fwdm_set, invm_set = fwdm.__setitem__, invm.__setitem__
fwdm_del, invm_del = fwdm.__delitem__, invm.__delitem__
write: list[t.Callable[[], None]] = [
writes: list[Write] = [
partial(fwdm_set, newkey, newval),
partial(invm_set, newval, newkey),
]
write_append = write.append
unwrite: list[t.Callable[[], None]] = []
append_write = writes.append
unwrites: list[Unwrite] = []
if oldval is MISSING and oldkey is MISSING: # no key or value duplication
# {0: 1, 2: 3} | {4: 5} => {0: 1, 2: 3, 4: 5}
if save_unwrite:
unwrite = [
unwrites = [
partial(fwdm_del, newkey),
partial(invm_del, newval),
]
elif oldval is not MISSING and oldkey is not MISSING: # key and value duplication across two different items
# {0: 1, 2: 3} | {0: 3} => {0: 3}
write_append(partial(fwdm_del, oldkey))
write_append(partial(invm_del, oldval))
append_write(partial(fwdm_del, oldkey))
append_write(partial(invm_del, oldval))
if save_unwrite:
unwrite = [
unwrites = [
partial(fwdm_set, newkey, oldval),
partial(invm_set, oldval, newkey),
partial(fwdm_set, oldkey, newval),
partial(invm_set, newval, oldkey),
]
elif oldval is not MISSING: # just key duplication
# {0: 1, 2: 3} | {2: 4} => {0: 1, 2: 4}
write_append(partial(invm_del, oldval))
append_write(partial(invm_del, oldval))
if save_unwrite:
unwrite = [
unwrites = [
partial(fwdm_set, newkey, oldval),
partial(invm_set, oldval, newkey),
partial(invm_del, newval),
]
else:
assert oldkey is not MISSING # just value duplication
# {0: 1, 2: 3} | {4: 3} => {0: 1, 4: 3}
write_append(partial(fwdm_del, oldkey))
append_write(partial(fwdm_del, oldkey))
if save_unwrite:
unwrite = [
unwrites = [
partial(fwdm_set, oldkey, newval),
partial(invm_set, newval, oldkey),
partial(fwdm_del, newkey),
]
return write, unwrite
return writes, unwrites

def _update(
self,
arg: MapOrItems[KT, VT],
kw: t.Mapping[str, VT] = MappingProxyType({}),
*,
rbof: bool | None = None,
rollback: bool | None = None,
on_dup: OnDup | None = None,
) -> None:
"""Update, possibly rolling back on failure as per *rbof*."""
"""Update with the items from *arg* and *kw*, maybe failing and rolling back as per *on_dup* and *rollback*."""
# Note: We must process input in a single pass, since arg may be a generator.
if not isinstance(arg, (t.Iterable, Maplike)):
raise TypeError(f"'{arg.__class__.__name__}' object is not iterable")
if not arg and not kw:
return
if on_dup is None:
on_dup = self.on_dup
if rbof is None:
rbof = RAISE in on_dup
if not self and not kw:
if isinstance(arg, BidictBase): # can skip dup check
if rollback is None:
rollback = RAISE in on_dup

# Fast paths when we're updating only from another bidict (i.e. no dup vals in new items).
if not kw and isinstance(arg, BidictBase):
if not self:
self._init_from(arg)
return
# If arg is not a BidictBase, fall through to the general treatment below,
# which includes duplication checking. (If arg is some BidirectionalMapping
# that does not inherit from BidictBase, it's a foreign implementation, so we
# perform duplication checking to err on the safe side.)

# Note: We must process input in a single pass, since arg may be a generator.
# When we can use fast set implementations of isdisjoint to ensure no duplication
# and then perform fast dict update, and don't need to maintain the same order between
# self and self.inverse, this is faster than de-duplicating an item at a time.
# Note: We don't generalize this to handle duplication when self and arg are not disjoint,
# since e.g. differentiating between a duplicated entire item (which is ignored) and an
# item that actually does result in a key and/or value duplication would defeat the speedup.
if (
not getattr(self, '_ordered', None)
and self._fwdm_cls is dict
and self._invm_cls is dict
and self.keys().isdisjoint(arg.keys())
and self.values().isdisjoint(arg.values())
):
self._fwdm.update(arg._fwdm)
self._invm.update(arg._invm)
return

# If we roll back on failure and we know that there are more updates to process than
# already-contained items, our rollback strategy is to update a copy of self (without
# rolling back on failure), and then to become the copy if all updates succeed.
if rbof and isinstance(arg, t.Sized) and len(arg) + len(kw) > len(self):
target = self.copy()
target._update(arg, kw, rbof=False, on_dup=on_dup)
self._init_from(target)
# Fast path when we're adding more items than we contain already and rollback is enabled:
# Update a copy of self with rollback disabled. Fail if that fails, otherwise become the copy.
if rollback and isinstance(arg, t.Sized) and len(arg) + len(kw) > len(self):
tmp = self.copy()
tmp._update(arg, kw, rollback=False, on_dup=on_dup)
self._init_from(tmp)
return

# In all other cases we perform the update as follows:
# For each new item, perform a dup check (raising if necessary), compute the associated writes we need to
# perform on our backing _fwdm and _invm mappings, and apply the writes. If rollback is enabled, also compute
# the associated unwrites as we go. If the update results in a DuplicationError and rollback is enabled, apply
# the accumulated unwrites before raising to ensure we fail clean.
writes = self._prep_writes(iteritems(arg, **kw), rollback, on_dup)
# Map call onto writes and consume the generator at C speed by feeding to a 0-length deque.
deque(map(call, writes), maxlen=0)

def _prep_writes(self, new_items: Items[KT, VT], rollback: bool, on_dup: OnDup) -> t.Iterator[Write]:
# There are more already-contained items than updates to process, or we don't know
# how many updates there are to process. If we need to roll back on failure,
# save a log of Unwrites as we update so we can undo changes if the update fails.
unwrites: list[Unwrite] = []
append_unwrite = unwrites.append
prep_write = self._prep_write
for key, val in iteritems(arg, **kw):
extend_unwrites = unwrites.extend
writespec = self._prep_write
for key, val in new_items:
try:
dedup_result = self._dedup(key, val, on_dup)
except DuplicationError:
if rbof:
while unwrites: # apply saved unwrites
unwrite = unwrites.pop()
for unwriteop in unwrite:
unwriteop()
if rollback:
yield from reversed(unwrites)
raise
if dedup_result is None: # no-op
continue
write, unwrite = prep_write(key, val, *dedup_result, save_unwrite=rbof)
for writeop in write: # apply the write
writeop()
if rbof and unwrite: # save the unwrite for later application if needed
append_unwrite(unwrite)
writes, new_unwrites = writespec(key, val, *dedup_result, save_unwrite=rollback)
yield from writes
if rollback and new_unwrites: # save new unwrites in case we need them later
extend_unwrites(new_unwrites)

def __copy__(self: BT) -> BT:
"""Used for the copy protocol. See the :mod:`copy` module."""
Expand Down Expand Up @@ -523,15 +552,15 @@ def __or__(self: BT, other: t.Mapping[KT, VT]) -> BT:
if not isinstance(other, t.Mapping):
return NotImplemented
new = self.copy()
new._update(other, rbof=False)
new._update(other, rollback=False)
return new

def __ror__(self: BT, other: t.Mapping[KT, VT]) -> BT:
"""Return other|self."""
if not isinstance(other, t.Mapping):
return NotImplemented
new = self.__class__(other)
new._update(self, rbof=False)
new._update(self, rollback=False)
return new

@override
Expand Down
Loading

0 comments on commit 9d451d1

Please sign in to comment.