Skip to content

Commit

Permalink
Merge pull request #18 from datarootsio/feature/add-config-option
Browse files Browse the repository at this point in the history
Feature/add config option
  • Loading branch information
murilo-cunha committed Jan 17, 2022
2 parents 08b979e + 59bcacc commit 6febdba
Show file tree
Hide file tree
Showing 19 changed files with 549 additions and 119 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/publish.yml
Expand Up @@ -20,6 +20,8 @@ jobs:
id: git-version
with:
release-branch: main
minor-identifier: feature/
major-identifier: breaking/

publish:
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -31,6 +31,7 @@ The key features include:
- [Rich](https://rich.readthedocs.io/en/latest/)
- [Pydantic](https://pydantic-docs.helpmanual.io/)
- [GitPython](https://gitpython.readthedocs.io/en/stable/tutorial.html)
- [Tomli](https://github.com/hukkin/tomli)

## Installation

Expand Down
87 changes: 79 additions & 8 deletions databooks/cli.py
Expand Up @@ -4,16 +4,18 @@
from pathlib import Path
from typing import List, Optional

import tomli
from rich.progress import (
BarColumn,
Progress,
SpinnerColumn,
TextColumn,
TimeElapsedColumn,
)
from typer import Argument, BadParameter, Exit, Option, Typer, echo
from typer import Argument, BadParameter, Context, Exit, Option, Typer, echo

from databooks.common import expand_paths
from databooks.config import TOML_CONFIG_FILE, get_config
from databooks.conflicts import conflicts2nbs, path2conflicts
from databooks.logging import get_logger
from databooks.metadata import clear_all
Expand All @@ -25,17 +27,54 @@
app = Typer()


def version_callback(value: bool) -> None:
def _version_callback(show_version: bool) -> None:
"""Return application version."""
if value:
if show_version:
echo("databooks version: " + _DISTRIBUTION_METADATA["Version"])
raise Exit()


def _help_callback(ctx: Context, show_help: Optional[bool]) -> None:
"""Reimplement `help` command to execute eagerly."""
if show_help:
echo(ctx.command.get_help(ctx))
raise Exit()


def _config_callback(ctx: Context, config_path: Optional[Path]) -> Optional[Path]:
"""Get config file and inject values into context to override default args."""
target_paths = expand_paths(
paths=[Path(p) for p in ctx.params.get("paths", ())], rglob="*"
)
config_path = (
get_config(
target_paths=target_paths,
config_filename=TOML_CONFIG_FILE,
)
if config_path is None and target_paths
else config_path
)
logger.debug(f"Loading config file from: {config_path}")

ctx.default_map = ctx.default_map or {} # initialize defaults

if config_path is not None: # config may not be specified
with config_path.open("r") as f:
conf = (
tomli.load(f)
.get("tool", {})
.get("databooks", {})
.get(ctx.command.name, {})
)
# Merge configuration
ctx.default_map.update({k.replace("-", "_"): v for k, v in conf.items()})
return config_path


@app.callback()
def callback( # noqa: D103
version: Optional[bool] = Option(
None, "--version", callback=version_callback, is_eager=True
None, "--version", callback=_version_callback, is_eager=True
)
) -> None:
...
Expand All @@ -45,9 +84,9 @@ def callback( # noqa: D103
callback.__doc__ = _DISTRIBUTION_METADATA["Summary"]


@app.command()
@app.command(add_help_option=False)
def meta(
paths: List[Path] = Argument(..., help="Path(s) of notebook files"),
paths: List[Path] = Argument(..., is_eager=True, help="Path(s) of notebook files"),
ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),
prefix: str = Option("", help="Prefix to add to filepath when writing files"),
suffix: str = Option("", help="Suffix to add to filepath when writing files"),
Expand All @@ -70,13 +109,30 @@ def meta(
verbose: bool = Option(
False, "--verbose", "-v", help="Log processed files in console"
),
config: Optional[Path] = Option(
None,
"--config",
"-c",
is_eager=True,
callback=_config_callback,
resolve_path=True,
exists=True,
help="Get CLI options from configuration file",
),
help: Optional[bool] = Option(
None, is_eager=True, callback=_help_callback, help="Show this message and exit"
),
) -> None:
"""Clear both notebook and cell metadata."""
if any(path.suffix not in ("", ".ipynb") for path in paths):
raise BadParameter(
"Expected either notebook files, a directory or glob expression."
)
nb_paths = expand_paths(paths=paths, ignore=ignore)
if not nb_paths:
logger.info(f"No notebooks found in {paths}. Nothing to do.")
raise Exit()

if not bool(prefix + suffix) and not check:
if not overwrite:
raise BadParameter(
Expand Down Expand Up @@ -125,9 +181,11 @@ def meta(
)


@app.command()
@app.command(add_help_option=False)
def fix(
paths: List[Path] = Argument(..., help="Path(s) of notebook files with conflicts"),
paths: List[Path] = Argument(
..., is_eager=True, help="Path(s) of notebook files with conflicts"
),
ignore: List[str] = Option(["!*"], help="Glob expression(s) of files to ignore"),
metadata_head: bool = Option(
True, help="Whether or not to keep the metadata from the head/current notebook"
Expand All @@ -150,6 +208,19 @@ def fix(
help="Interactively resolve the conflicts (not implemented)",
),
verbose: bool = Option(False, help="Log processed files in console"),
config: Optional[Path] = Option(
None,
"--config",
"-c",
is_eager=True,
callback=_config_callback,
resolve_path=True,
exists=True,
help="Get CLI options from configuration file",
),
help: Optional[bool] = Option(
None, is_eager=True, callback=_help_callback, help="Show this message and exit"
),
) -> None:
"""
Fix git conflicts for notebooks.
Expand Down
71 changes: 62 additions & 9 deletions databooks/common.py
Expand Up @@ -2,9 +2,12 @@
import json
from itertools import chain
from pathlib import Path
from typing import List
from typing import Iterable, List, Optional

from databooks import JupyterNotebook
from databooks.logging import get_logger

logger = get_logger(__file__)


def write_notebook(nb: JupyterNotebook, path: Path) -> None:
Expand All @@ -13,22 +16,72 @@ def write_notebook(nb: JupyterNotebook, path: Path) -> None:
json.dump(nb.dict(), fp=f, indent=2)


def expand_paths(paths: List[Path], ignore: List[str]) -> List[Path]:
def expand_paths(
paths: List[Path], *, ignore: List[str] = ["!*"], rglob: str = "*.ipynb"
) -> List[Path]:
"""
Get paths of existing file from list of directory or file paths.
:param paths: Paths to consider (can be directories or files)
:param ignore: Glob expressions of files to ignore
:return: List of existing paths for notebooks
:param rglob: Glob expression for expanding directory paths and filtering out
existing file paths (i.e.: to retrieve only notebooks)
:return: List of existing file paths
"""
paths = list(
filepaths = list(
chain.from_iterable(
list(path.rglob("*.ipynb")) if path.is_dir() else [path] for path in paths
list(path.rglob(rglob)) if path.is_dir() else [path] for path in paths
)
)

return [
valid_filepaths = [
p
for p in paths
if not any(p.match(i) for i in ignore) and p.exists() and p.suffix == ".ipynb"
for p in filepaths
if not any(p.match(i) for i in ignore) and p.is_file() and p.match(rglob)
]

if not valid_filepaths:
logger.debug(
f"There are no files in {paths} (ignoring {ignore}) that match `{rglob}`."
)
return valid_filepaths


def find_common_parent(paths: Iterable[Path]) -> Path:
"""Find common parent amongst several file paths."""
if not paths:
raise ValueError(f"Expected non-empty `paths`, got {paths}.")
return max(set.intersection(*[set(p.resolve().parents) for p in paths]))


def find_obj(
obj_name: str, start: Path, finish: Path, is_dir: bool = False
) -> Optional[Path]:
"""
Recursively find file along directory path, from the end (child) directory to start.
:param obj_name: File name to locate
:param start: Start (parent) directory
:param finish: Finish (child) directory
:param is_dir: Whether object is a directory or a file
:return: File path
"""
if not start.is_dir() or not finish.is_dir():
raise ValueError("Parameters `start` and `finish` must be directories.")

if start.resolve() not in [finish, *finish.resolve().parents]:
logger.debug(
f"Parameter `start` is not a parent directory of `finish` (for {start} and"
f" {finish}). Cannot find {obj_name}."
)
return None

is_obj = (finish / obj_name).is_dir() if is_dir else (finish / obj_name).is_file()
if is_obj:
return finish / obj_name
elif finish.samefile(start):
logger.debug(f"{obj_name} not found between {start} and {finish}.")
return None
else:
return find_obj(
obj_name=obj_name, start=start, finish=finish.parent, is_dir=is_dir
)
26 changes: 26 additions & 0 deletions databooks/config.py
@@ -0,0 +1,26 @@
"""Configuration functions, and settings objects."""
from pathlib import Path
from typing import Any, Dict, List, Optional

from databooks.common import find_common_parent, find_obj
from databooks.git_utils import get_repo
from databooks.logging import get_logger

TOML_CONFIG_FILE = "pyproject.toml"
INI_CONFIG_FILE = "settings.ini"

ConfigFields = Dict[str, Any]

logger = get_logger(__file__)


def get_config(target_paths: List[Path], config_filename: str) -> Optional[Path]:
"""Find configuration file from CLI target paths."""
common_parent = find_common_parent(paths=target_paths)
repo_dir = get_repo().working_dir

return find_obj(
obj_name=config_filename,
start=Path(repo_dir) if repo_dir is not None else Path(common_parent.anchor),
finish=common_parent,
)
4 changes: 2 additions & 2 deletions databooks/conflicts.py
Expand Up @@ -7,7 +7,7 @@

from git import Repo

from databooks.common import write_notebook
from databooks.common import find_common_parent, write_notebook
from databooks.data_models.notebook import Cell, Cells, JupyterNotebook
from databooks.git_utils import ConflictFile, get_conflict_blobs, get_repo
from databooks.logging import get_logger, set_verbose
Expand All @@ -29,7 +29,7 @@ def path2conflicts(
raise ValueError(
"Expected either notebook files, a directory or glob expression."
)
common_parent = max(set.intersection(*[set(p.parents) for p in nb_paths]))
common_parent = find_common_parent(nb_paths)
repo = get_repo(common_parent) if repo is None else repo
return [
file
Expand Down
5 changes: 1 addition & 4 deletions databooks/data_models/base.py
Expand Up @@ -28,8 +28,7 @@ class DiffModel(Protocol, Iterable):
is_diff: bool

def resolve(self, *args: Any, **kwargs: Any) -> DatabooksBase:
"""Return a valid base object."""
...
"""Protocol method that returns a valid base object."""


class BaseCells(UserList, Generic[T]):
Expand All @@ -40,8 +39,6 @@ def resolve(self, **kwargs: Any) -> list:
"""Return valid notebook cells from differences."""
raise NotImplementedError

...


@overload
def resolve(
Expand Down
4 changes: 0 additions & 4 deletions databooks/data_models/notebook.py
Expand Up @@ -31,14 +31,10 @@
class NotebookMetadata(DatabooksBase):
"""Notebook metadata. Empty by default but can accept extra fields."""

...


class CellMetadata(DatabooksBase):
"""Cell metadata. Empty by default but can accept extra fields."""

...


class Cell(DatabooksBase):
"""
Expand Down
8 changes: 6 additions & 2 deletions databooks/git_utils.py
Expand Up @@ -5,6 +5,7 @@

from git import Blob, Git, Repo # type: ignore

from databooks.common import find_obj
from databooks.logging import get_logger

logger = get_logger(name=__file__)
Expand All @@ -31,8 +32,11 @@ class ConflictFile:

def get_repo(path: Path = Path.cwd()) -> Repo:
"""Find git repo in current or parent directories."""
repo = Repo(path=path, search_parent_directories=True)
logger.info(f"Repo found at: {repo.working_dir}")
repo_dir = find_obj(
obj_name=".git", start=Path(path.anchor), finish=path, is_dir=True
)
repo = Repo(path=repo_dir)
logger.debug(f"Repo found at: {repo.working_dir}")
return repo


Expand Down
4 changes: 3 additions & 1 deletion databooks/logging.py
@@ -1,12 +1,14 @@
"""Logging helper functions."""

import logging
import os

from rich.logging import RichHandler


def get_logger(name: str, level: str = "INFO") -> logging.Logger:
def get_logger(name: str) -> logging.Logger:
"""Get logger with rich configuration."""
level = os.getenv("LOG_LEVEL", logging.INFO)
logging.basicConfig(
level=level,
format="%(message)s",
Expand Down

0 comments on commit 6febdba

Please sign in to comment.