Skip to content

Commit

Permalink
Merge pull request #35 from investigativedata/develop
Browse files Browse the repository at this point in the history
v0.1.0
  • Loading branch information
simonwoerpel committed Mar 6, 2024
2 parents 842ac70 + 027c067 commit 22f313f
Show file tree
Hide file tree
Showing 19 changed files with 502 additions and 149 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.6
current_version = 0.1.0
commit = True
tag = True
message = 🔖 Bump version: {current_version} → {new_version}
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ repos:
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 24.1.1
rev: 24.2.0
hooks:
- id: black

Expand Down Expand Up @@ -68,7 +68,7 @@ repos:
- id: rst-inline-touching-normal

- repo: https://github.com/python-poetry/poetry
rev: 1.7.0
rev: 1.8.0
hooks:
- id: poetry-check
- id: poetry-lock
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.6
0.1.0
2 changes: 1 addition & 1 deletion anystore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
]


__version__ = "0.0.6"
__version__ = "0.1.0"
11 changes: 11 additions & 0 deletions anystore/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ def cli_put(
S.put(key, value)


@cli.command("keys")
def cli_keys(
prefix: Annotated[Optional[str], typer.Argument(..., help="Key prefix")] = None,
o: Annotated[str, typer.Option("-o", help="Output uri")] = "-",
):
with ErrorHandler():
S = get_store(uri=state["uri"], use_pickle=state["pickle"])
keys = "\n".join(S.iterate_keys(prefix))
smart_write(o, keys.encode())


@cli.command("io")
def cli_io(
i: Annotated[str, typer.Option("-i", help="Input uri")] = "-",
Expand Down
3 changes: 2 additions & 1 deletion anystore/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def _inner(*args, **kwargs):
res = func(*args, **kwargs)
if serialize_func is not None:
res = serialize_func(res)
store.put(key, res)
if key is not None:
store.put(key, res)
return res

return _inner
Expand Down
31 changes: 14 additions & 17 deletions anystore/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
log = logging.getLogger(__name__)

DEFAULT_MODE = "rb"
DEFAULT_WRITE_MODE = "wb"

Uri: TypeAlias = Path | BinaryIO | TextIO | str

Expand All @@ -26,12 +27,10 @@ class SmartHandler:
def __init__(
self,
uri: Uri,
*args,
**kwargs,
) -> None:
self.uri = ensure_uri(uri)
self.is_buffer = self.uri == "-"
self.args = args
kwargs["mode"] = kwargs.get("mode", DEFAULT_MODE)
self.sys_io = _get_sysio(kwargs["mode"])
if hasattr(self.sys_io, "buffer"):
Expand All @@ -43,7 +42,7 @@ def open(self):
if self.is_buffer:
self.handler = self.sys_io
else:
handler = open(self.uri, *self.args, **self.kwargs)
handler = open(self.uri, **self.kwargs)
self.handler = handler.open()
return self.handler

Expand All @@ -61,33 +60,31 @@ def __exit__(self, *args, **kwargs) -> None:
@contextlib.contextmanager
def smart_open(
uri: Uri,
mode: str | None = None,
*args,
mode: str | None = DEFAULT_MODE,
**kwargs,
):
if mode is not None:
kwargs["mode"] = mode
else:
kwargs["mode"] = kwargs.get("mode", DEFAULT_MODE)
handler = SmartHandler(uri, *args, **kwargs)
handler = SmartHandler(uri, mode=mode, **kwargs)
try:
yield handler.open()
finally:
handler.close()


def smart_stream(uri: Uri, *args, **kwargs) -> Generator[str | bytes, None, None]:
with smart_open(uri, *args, **kwargs) as fh:
def smart_stream(
uri: Uri, mode: str | None = DEFAULT_MODE, **kwargs
) -> Generator[str | bytes, None, None]:
with smart_open(uri, mode, **kwargs) as fh:
while line := fh.readline():
yield line


def smart_read(uri: Uri, *args, **kwargs) -> Any:
with smart_open(uri, *args, **kwargs) as fh:
def smart_read(uri: Uri, mode: str | None = DEFAULT_MODE, **kwargs) -> Any:
with smart_open(uri, mode, **kwargs) as fh:
return fh.read()


def smart_write(uri, content: bytes | str, *args, **kwargs) -> None:
kwargs["mode"] = kwargs.get("mode", "wb")
with smart_open(uri, *args, **kwargs) as fh:
def smart_write(
uri, content: bytes | str, mode: str | None = DEFAULT_WRITE_MODE, **kwargs
) -> None:
with smart_open(uri, mode, **kwargs) as fh:
fh.write(content)
7 changes: 7 additions & 0 deletions anystore/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ class Settings(BaseSettings):
json_uri: str | None = None
serialization_mode: Mode | None = "auto"
raise_on_nonexist: bool = True


class SqlSettings(BaseSettings):
model_config = SettingsConfigDict(env_prefix="anystore_sql_")

table: str | None = "anystore"
pool_size: int | None = 5
34 changes: 34 additions & 0 deletions anystore/store/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from functools import cache
from urllib.parse import urlparse

from anystore.settings import Settings
from anystore.store.base import BaseStore
from anystore.store.fs import Store
from anystore.store.redis import RedisStore
from anystore.store.sql import SqlStore
from anystore.util import ensure_uri


settings = Settings()


@cache
def get_store(**kwargs) -> BaseStore:
uri = kwargs.get("uri")
if uri is None:
if settings.yaml_uri is not None:
store = BaseStore.from_yaml_uri(settings.yaml_uri, **kwargs)
return get_store(**store.model_dump())
if settings.json_uri is not None:
store = BaseStore.from_json_uri(settings.json_uri, **kwargs)
return get_store(**store.model_dump())
uri = ensure_uri(uri)
parsed = urlparse(uri)
if parsed.scheme == "redis":
return RedisStore(**kwargs)
if "sql" in parsed.scheme:
return SqlStore(**kwargs)
return Store(**kwargs)


__all__ = ["get_store", "Store", "RedisStore", "SqlStore"]
59 changes: 38 additions & 21 deletions anystore/store.py → anystore/store/base.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,48 @@
from functools import cache
from typing import Any
from typing import Any, Generator

from pydantic import field_validator
from anystore.exceptions import DoesNotExist
from anystore.io import smart_read, smart_write

from anystore.exceptions import DoesNotExist
from anystore.mixins import BaseModel
from anystore.serialize import from_store, to_store, Mode
from anystore.types import Uri
from anystore.util import clean_dict, ensure_uri
from anystore.serialize import Mode, from_store, to_store
from anystore.settings import Settings
from anystore.types import Uri, Value
from anystore.util import clean_dict, ensure_uri


settings = Settings()


class Store(BaseModel):
class BaseStore(BaseModel):
uri: str | None = settings.uri
serialization_mode: Mode | None = settings.serialization_mode
raise_on_nonexist: bool | None = settings.raise_on_nonexist
backend_config: dict[str, Any] | None = None

def _write(self, key: Uri, value: Value, **kwargs) -> None:
"""
Write value with key to acutal backend
"""
raise NotImplementedError

def _read(self, key: Uri, raise_on_nonexist: bool | None = True, **kwargs) -> Any:
"""
Read key from actual backend
"""
raise NotImplementedError

def _get_key_prefix(self) -> str:
"""
Get backend specific key prefix
"""
raise NotImplementedError

def _iterate_keys(self, prefix: str | None = None) -> Generator[str, None, None]:
"""
Backend specific key iterator
"""
raise NotImplementedError

def get(
self,
key: Uri,
Expand All @@ -34,7 +56,9 @@ def get(
kwargs = self.ensure_kwargs(**kwargs)
key = self.get_key(key)
try:
return from_store(smart_read(key, **kwargs), serialization_mode)
return from_store(
self._read(key, raise_on_nonexist, **kwargs), serialization_mode
)
except FileNotFoundError: # fsspec
if raise_on_nonexist:
raise DoesNotExist(f"Key does not exist: `{key}`")
Expand All @@ -46,27 +70,20 @@ def put(
serialization_mode = serialization_mode or self.serialization_mode
kwargs = self.ensure_kwargs(**kwargs)
key = self.get_key(key)
smart_write(key, to_store(value, serialization_mode))
self._write(key, to_store(value, serialization_mode))

def ensure_kwargs(self, **kwargs) -> dict[str, Any]:
config = clean_dict(self.backend_config)
return {**config, **clean_dict(kwargs)}

def get_key(self, key: Uri) -> str:
return f"{self.uri}/{str(key).lstrip('/')}"
return f"{self._get_key_prefix()}/{str(key)}".strip("/")

def iterate_keys(self, prefix: str | None = None) -> Generator[str, None, None]:
yield from self._iterate_keys(prefix)

@field_validator("uri", mode="before")
@classmethod
def ensure_uri(cls, v: Any) -> str:
uri = ensure_uri(v)
return uri.rstrip("/")


@cache
def get_store(**kwargs) -> Store:
if "uri" not in kwargs:
if settings.yaml_uri is not None:
return Store.from_yaml_uri(settings.yaml_uri, **kwargs)
if settings.json_uri is not None:
return Store.from_json_uri(settings.json_uri, **kwargs)
return Store(**kwargs)
40 changes: 40 additions & 0 deletions anystore/store/fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Store backend using any file-like location usable via `fsspec`
"""

from typing import Generator

import fsspec
from banal import ensure_dict

from anystore.io import smart_read, smart_write
from anystore.exceptions import DoesNotExist
from anystore.store.base import BaseStore
from anystore.types import Uri, Value


class Store(BaseStore):
def _write(self, key: Uri, value: Value, **kwargs) -> None:
return smart_write(key, value, **kwargs)

def _read(
self, key: Uri, raise_on_nonexist: bool | None = True, **kwargs
) -> Value | None:
try:
return smart_read(str(key), **kwargs)
except FileNotFoundError:
if raise_on_nonexist:
raise DoesNotExist(f"Key does not exist: `{key}`")
return None

# def _exists(self, key: Uri) -> bool:
def _get_key_prefix(self) -> str:
return str(self.uri).rstrip("/")

def _iterate_keys(self, prefix: str | None = None) -> Generator[str, None, None]:
path = self.get_key(prefix or "")
mapper = fsspec.get_mapper(path, **ensure_dict(self.backend_config))
for key in mapper.keys():
if prefix:
key = f"{prefix}/{key}"
yield key
64 changes: 64 additions & 0 deletions anystore/store/redis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
Store backend using redis-like stores such as Redis, Fakeredis or Apache Kvrocks
"""

from typing import Any, Generator
import logging
import os
from functools import cache

from banal import as_bool
import redis
import fakeredis

from anystore.exceptions import DoesNotExist
from anystore.store.base import BaseStore
from anystore.types import Uri, Value


log = logging.getLogger(__name__)


@cache
def get_redis(uri: str) -> fakeredis.FakeStrictRedis | redis.Redis:
if as_bool(os.environ.get("REDIS_DEBUG")):
con = fakeredis.FakeStrictRedis()
con.ping()
log.info("Redis connected: `fakeredis`")
return con
con = redis.from_url(uri)
con.ping()
log.info("Redis connected: `{uri}`")
return con


class RedisStore(BaseStore):
def _write(self, key: Uri, value: Value, **kwargs) -> None:
con = get_redis(self.uri)
con.set(str(key), value, **kwargs)

def _read(self, key: Uri, raise_on_nonexist: bool | None = True, **kwargs) -> Any:
con = get_redis(self.uri)
key = str(key)
# `None` could be stored as an actual value, to implement `raise_on_nonexist`
# we need to check this first:
if raise_on_nonexist and not con.exists(key):
raise DoesNotExist
res = con.get(str(key))
# mimic fs read mode:
if kwargs.get("mode") == "r" and isinstance(res, bytes):
res = res.decode()
return res

def _get_key_prefix(self) -> str:
if self.backend_config is not None:
return self.backend_config.get("redis_prefix") or "anystore"
return "anystore"

def _iterate_keys(self, prefix: str | None = None) -> Generator[str, None, None]:
con = get_redis(self.uri)
prefix = self.get_key(prefix or "") + "*"
key_prefix = self._get_key_prefix()
for key in con.scan_iter(prefix):
key = key.decode()
yield key[len(key_prefix) + 1 :]
Loading

0 comments on commit 22f313f

Please sign in to comment.