Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move configuration to dataclass #350

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
@@ -1,9 +1,8 @@
# cihai · [![Python Package](https://img.shields.io/pypi/v/cihai.svg)](https://pypi.org/project/cihai/) [![License](https://img.shields.io/github/license/cihai/cihai.svg)](https://github.com/cihai/cihai/blob/master/LICENSE) [![Code Coverage](https://codecov.io/gh/cihai/cihai/branch/master/graph/badge.svg)](https://codecov.io/gh/cihai/cihai)

Python library for [CJK](https://cihai.git-pull.com/glossary.html#term-cjk) (chinese, japanese,
korean) data.

This project is under active development. Follow our progress and check back for updates!
Cihai is an open source Python library for looking up and analyzing Chinese
characters, Japanese kanji, Korean Hanja, and Vietnamese Chữ Nôm ([CJK]) across various
datasets.

## Quickstart

Expand Down
2 changes: 1 addition & 1 deletion examples/dataset.py
Expand Up @@ -34,7 +34,7 @@ def run() -> None:
c = Cihai(unihan=False)

c.add_dataset(MyDataset, namespace="moo")
my_dataset = MyDataset()
my_dataset = MyDataset(cihai=c)
my_dataset.bootstrap()

print("Definitions exactly for 好", my_dataset.givemedata("好"))
Expand Down
2 changes: 1 addition & 1 deletion examples/variants.py
Expand Up @@ -22,7 +22,7 @@ def run(unihan_options: t.Optional[t.Dict[str, object]] = None) -> None:
"""Wrapped so we can test in tests/test_examples.py"""
print("This example prints variant character data.")

c = Cihai(config={"unihan_options": unihan_options})
c = Cihai(config={"plugins": {"variants": {"options": unihan_options}}})
if not c.unihan.is_bootstrapped: # download and install Unihan to db
c.unihan.bootstrap()

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
@@ -1,7 +1,7 @@
[tool.poetry]
name = "cihai"
version = "0.30.0"
description = "Library for CJK (chinese, japanese, korean) language data."
description = "Python library designed for CJK (Chinese, Japanese, Korean, Vietnamese) character and word data retrieval and analysis."
license = "MIT"
authors = ["Tony Narlock <tony@git-pull.com>"]
classifiers = [
Expand Down
92 changes: 70 additions & 22 deletions src/cihai/config.py
@@ -1,23 +1,34 @@
"""Configuration options for Cihai app."""
import dataclasses
import os
import pathlib
import typing as t

from appdirs import AppDirs

from cihai.constants import app_dirs
from cihai.constants import DEFAULT_CONFIG, app_dirs

if t.TYPE_CHECKING:
from .types import UntypedDict
from cihai.constants import Config
from cihai.types import UntypedDict
from unihan_etl._internal.app_dirs import AppDirs

C = t.TypeVar("C", Config, UntypedDict)


def is_default_option(field_name: str, val: t.Any) -> bool:
"""Return True if field default option in configuration."""
return bool(val == getattr(DEFAULT_CONFIG, field_name, ""))

def expand_config(d: "UntypedDict", dirs: "AppDirs" = app_dirs) -> None:

def expand_config(
d: "C",
dirs: "AppDirs" = app_dirs,
) -> "C":
"""
Expand configuration XDG variables, environmental variables, and tildes.

Parameters
----------
d : dict
d : dict or Options
config information
dirs : appdirs.AppDirs
XDG application mapping
Expand Down Expand Up @@ -52,19 +63,56 @@ def expand_config(d: "UntypedDict", dirs: "AppDirs" = app_dirs) -> None:
"site_data_dir": dirs.site_data_dir,
}

if "datasets" in d and "plugins" not in d:
d["datasets"] = {}

for k, v in d.items():
if isinstance(v, dict):
expand_config(v, dirs)
if isinstance(v, str):
d[k] = os.path.expanduser( # NOQA: PTH111
os.path.expandvars(v).format(**context),
)

path = pathlib.Path(t.cast(str, d[k]))
if path.exists() or any(
str(path).startswith(app_dir) for app_dir in context.values()
):
d[k] = path
if dataclasses.is_dataclass(d):
for field in dataclasses.fields(d):
if field.name == "dirs":
continue

v = getattr(d, field.name)
if dataclasses.is_dataclass(v):
setattr(d, field.name, expand_config(getattr(d, field.name)))
v = getattr(d, field.name)

if isinstance(v, dict):
setattr(d, field.name, expand_config(v, dirs))
v = getattr(d, field.name)

if isinstance(v, pathlib.Path):
setattr(d, field.name, str(v))
v = getattr(d, field.name)

if isinstance(v, str):
setattr(
d,
field.name,
os.path.expanduser( # noqa: PTH111
os.path.expandvars(v).format(**context),
),
)

path = pathlib.Path(t.cast(str, getattr(d, field.name)))
if path.exists() or any(
str(path).startswith(str(app_dir)) for app_dir in context.values()
):
setattr(d, field.name, path)
elif isinstance(d, dict):
if "datasets" in d and "plugins" not in d:
d["datasets"] = {}

for k, v in d.items():
if isinstance(v, dict):
v = d[k] = expand_config(v, dirs)
if isinstance(v, pathlib.Path):
v = d[k] = str(v)
if isinstance(v, str):
d[k] = os.path.expanduser( # noqa: PTH111
os.path.expandvars(v).format(**context),
)

path = pathlib.Path(t.cast(str, d[k]))
if path.exists() or any(
str(path).startswith(str(app_dir)) for app_dir in context.values()
):
d[k] = path

return d
104 changes: 86 additions & 18 deletions src/cihai/constants.py
@@ -1,30 +1,98 @@
"""Constants for cihai."""
import dataclasses
import pathlib
import typing as t

from appdirs import AppDirs
from appdirs import AppDirs as BaseAppDirs

from cihai.__about__ import (
__author__,
__package_name__,
)
from unihan_etl._internal.app_dirs import AppDirs

if t.TYPE_CHECKING:
from cihai.types import UntypedDict
from typing_extensions import TypeAlias

from cihai.types import PluginMap


#: XDG App directory locations
app_dirs = AppDirs("cihai", "cihai team")
_app_dirs = BaseAppDirs(__package_name__, __author__)
app_dirs = AppDirs(_app_dirs=_app_dirs)


#: Default configuration
DEFAULT_CONFIG: "UntypedDict" = {
"debug": False,
"database": {"url": "sqlite:///{user_data_dir}/cihai.db"},
"dirs": {
"cache": pathlib.Path(app_dirs.user_cache_dir),
"log": pathlib.Path(app_dirs.user_log_dir),
"data": pathlib.Path(app_dirs.user_data_dir),
},
"datasets": {},
"plugins": {},
}
# DEFAULT_CONFIG: "UntypedDict" = {}


@dataclasses.dataclass
class Database:
"""Database configuration for Cihai."""

url: str = "sqlite:///{user_data_dir}/cihai.db"


DatasetName: "TypeAlias" = str
Dataset: "TypeAlias" = str
Datasets = t.Dict[DatasetName, Dataset]


@dataclasses.dataclass
class Config:
"""Cihai configuration."""

debug: bool = False
database: Database = dataclasses.field(default_factory=Database)
dirs: AppDirs = dataclasses.field(default_factory=lambda: app_dirs)
datasets: Datasets = dataclasses.field(default_factory=dict)
plugins: "PluginMap" = dataclasses.field(default_factory=dict)

def __post_init__(self) -> None:
"""Resolve variables and paths for Cihai configuration."""
if isinstance(self.dirs, BaseAppDirs):
self.dirs = AppDirs(_app_dirs=self.dirs)
if isinstance(self.dirs, dict):
self.dirs = AppDirs(_app_dirs=_app_dirs, **self.dirs)
# for k, v in directories.items():
# setattr(
# self.dirs,
# k,

if isinstance(self.database, dict):
database_url = self.database.get("url")
if isinstance(database_url, pathlib.Path):
database_url = str(database_url)
if isinstance(database_url, str):
self.database["url"] = database_url.format(
**dataclasses.asdict(self.dirs)
)

self.database = Database(**self.database)


DEFAULT_CONFIG = Config()


class UnihanConfigDict(t.TypedDict):
"""Unihan configuration dictionary."""

datasets: Datasets


#: User will be prompted to automatically configure their installation for UNIHAN
UNIHAN_CONFIG: "UntypedDict" = {
"datasets": {"unihan": "cihai.data.unihan.dataset.Unihan"},
# Turn off by default for using as a plugin example in examples/
}
UNIHAN_CONFIG: "UnihanConfigDict" = UnihanConfigDict(
{
"datasets": {"unihan": "cihai.data.unihan.dataset.Unihan"},
# Turn off by default for using as a plugin example in examples/
}
)


@dataclasses.dataclass
class UnihanConfig:
"""Unihan configuration."""

datasets: Datasets = dataclasses.field(
default_factory=lambda: UNIHAN_CONFIG["datasets"]
)