Skip to content

Commit

Permalink
feat: configure with pyproject (#82)
Browse files Browse the repository at this point in the history
  • Loading branch information
julesbertrand committed Nov 6, 2023
1 parent ad4c166 commit 56d69f4
Show file tree
Hide file tree
Showing 10 changed files with 316 additions and 35 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ jobs:
cd example
poetry run vertex-deployer --version
- name: Test config command
run: |
cd example
poetry run vertex-deployer config --all
- name: Test list command
run: |
cd example
Expand All @@ -95,7 +100,7 @@ jobs:
- name: Test check command
run: |
cd example
poetry run vertex-deployer -log DEBUG check --all
poetry run vertex-deployer check --all
- name: Test deploy command
# Cannot check more than compile action here, need GCP environment for upload, run, schedule
Expand All @@ -107,4 +112,4 @@ jobs:
run: |
cd example
poetry run vertex-deployer create test_pipeline --config-type py
[ -e vertex/pipelines/test_pipeline.py ] && echo 1 || echo 0
[ -e example/vertex/pipelines/test_pipeline.py ] && echo 1 || echo 0
64 changes: 62 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
- [CLI: Other commands](#🛠️-cli-other-commands)
- [`create`](#create)
- [`list`](#list)
- [CLI: Options](#🍭-cli-options)
- [`config`](#config)
- [CLI: Options](#cli-options)
- [Configuration](#configuration)

[Full CLI documentation](docs/CLI_REFERENCE.md)

Expand Down Expand Up @@ -208,7 +210,8 @@ vertex
You must have at least these files. If you need to share some config elements between pipelines,
you can have a `shared` folder in `configs` and import them in your pipeline configs.

You can use the [`create`](../usage#create) command to create a new pipeline and config files.
If you're following a different folder structure, you can change the default paths in the `pyproject.toml` file.
See [Configuration](#configuration) section for more information.

#### Pipelines

Expand Down Expand Up @@ -328,6 +331,14 @@ You can list all pipelines in the `vertex/pipelines` folder using the `list` com
vertex-deployer list --with-configs
```

#### `config`

You can check your `vertex-deployer` configuration options using the `config` command.
Fields set in `pyproject.toml` will overwrite default values and will be displayed differently:
```bash
vertex-deployer config --all
```

### 🍭 CLI: Options

```bash
Expand All @@ -346,6 +357,54 @@ vertex-deployer --log-level DEBUG deploy ...

<!-- --8<-- [end:usage] -->

## Configuration

You can configure the deployer using the `pyproject.toml` file to better fit your needs.
This will overwrite default values. It can be usefull if you always use the same options, e.g. always the same `--scheduler-timezone`

```toml
[tool.vertex-deployer]
pipelines_root_path = "my/path/to/vertex/pipelines"
configs_root_path = "my/path/to/vertex/configs"
log_level = "INFO"

[tool.vertex-deployer.deploy]
scheduler_timezone = "Europe/Paris"
```

You can display all the configurable parameterss with default values by running:
```bash
$ vertex-deployer config --all
'*' means the value was set in config file

* pipelines_root_path=my/path/to/vertex/pipelines
* config_root_path=my/path/to/vertex/configs
* log_level=INFO
deploy
env_file=None
compile=True
upload=False
run=False
schedule=False
cron=None
delete_last_schedule=False
* scheduler_timezone=Europe/Paris
tags=['latest']
config_filepath=None
config_name=None
enable_caching=False
experiment_name=None
local_package_path=vertex/pipelines/compiled_pipelines
check
all=False
config_filepath=None
raise_error=False
list
with_configs=True
create
config_type=json
```

## Repository Structure

```
Expand All @@ -360,6 +419,7 @@ vertex-deployer --log-level DEBUG deploy ...
├─ deployer # Source code
│ ├─ __init__.py
│ ├─ cli.py
│ ├─ configuration.py
│ ├─ constants.py
│ ├─ pipeline_checks.py
│ ├─ pipeline_deployer.py
Expand Down
87 changes: 68 additions & 19 deletions deployer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@
from pathlib import Path
from typing import List

import rich.traceback
import typer
from loguru import logger
from pydantic import ValidationError
from typing_extensions import Annotated

from deployer.configuration import load_configuration
from deployer.constants import (
CONFIG_ROOT_PATH,
DEFAULT_LOCAL_PACKAGE_PATH,
DEFAULT_SCHEDULER_TIMEZONE,
DEFAULT_TAGS,
PIPELINE_MINIMAL_TEMPLATE,
PIPELINE_ROOT_PATH,
PYTHON_CONFIG_TEMPLATE,
)
from deployer.utils.config import (
Expand All @@ -23,12 +24,19 @@
)
from deployer.utils.logging import LoguruLevel, console
from deployer.utils.utils import (
dict_to_repr,
import_pipeline_from_dir,
make_enum_from_python_package_dir,
print_check_results_table,
print_pipelines_list,
)

rich.traceback.install()

deployer_config = load_configuration()

PipelineName = make_enum_from_python_package_dir(deployer_config.pipelines_root_path)


def display_version_and_exit(value: bool):
if value:
Expand All @@ -38,12 +46,16 @@ def display_version_and_exit(value: bool):
raise typer.Exit()


app = typer.Typer(no_args_is_help=True, rich_help_panel="rich", rich_markup_mode="markdown")
app = typer.Typer(
no_args_is_help=True,
rich_help_panel="rich",
rich_markup_mode="markdown",
context_settings={"default_map": deployer_config.model_dump()},
)


@app.callback(name="set_logger")
def cli_set_logger(
ctx: typer.Context,
@app.callback(invoke_without_command=True)
def main(
log_level: Annotated[
LoguruLevel, typer.Option("--log-level", "-log", help="Set the logging level.")
] = LoguruLevel.INFO,
Expand All @@ -60,7 +72,26 @@ def cli_set_logger(
logger.configure(handlers=[{"sink": sys.stderr, "level": log_level}])


PipelineName = make_enum_from_python_package_dir(PIPELINE_ROOT_PATH)
@app.command()
def config(
all: Annotated[
bool, typer.Option("--all", "-a", help="Whether to display all configuration values.")
] = False,
):
"""Display the configuration from pyproject.toml."""

if all:
config_repr = dict_to_repr(
dict_=deployer_config.model_dump(),
subdict=deployer_config.model_dump(exclude_unset=True),
)
config_str = "[italic]'*' means the value was set in config file[/italic]\n\n"
config_str += "\n".join(config_repr)
else:
config_repr = dict_to_repr(dict_=deployer_config.model_dump(exclude_unset=True))
config_str = "\n".join(config_repr)

console.print(config_str)


@app.command(no_args_is_help=True)
Expand Down Expand Up @@ -116,6 +147,13 @@ def deploy( # noqa: C901
help="Whether to delete the previous schedule before creating a new one.",
),
] = False,
scheduler_timezone: Annotated[
str,
typer.Option(
help="Timezone for scheduling the pipeline."
" Must be a valid string from IANA time zone database",
),
] = DEFAULT_SCHEDULER_TIMEZONE,
tags: Annotated[
List[str], typer.Option(help="The tags to use when uploading the pipeline.")
] = DEFAULT_TAGS,
Expand Down Expand Up @@ -186,7 +224,9 @@ def deploy( # noqa: C901
" Please specify only one to run or schedule a pipeline."
)

pipeline_func = import_pipeline_from_dir(PIPELINE_ROOT_PATH, pipeline_name.value)
pipeline_func = import_pipeline_from_dir(
deployer_config.pipelines_root_path, pipeline_name.value
)

from deployer.pipeline_deployer import VertexPipelineDeployer

Expand All @@ -204,7 +244,9 @@ def deploy( # noqa: C901

if run or schedule:
if config_name is not None:
config_filepath = Path(CONFIG_ROOT_PATH) / pipeline_name.value / config_name
config_filepath = (
Path(deployer_config.config_root_path) / pipeline_name.value / config_name
)
parameter_values, input_artifacts = load_config(config_filepath)

if compile:
Expand Down Expand Up @@ -234,6 +276,7 @@ def deploy( # noqa: C901
parameter_values=parameter_values,
tag=tags[0] if tags else None,
delete_last_schedule=delete_last_schedule,
scheduler_timezone=scheduler_timezone,
)


Expand Down Expand Up @@ -289,7 +332,7 @@ def check(
if len(PipelineName.__members__) == 0:
raise ValueError(
"No pipeline found. Please check that the pipeline root path is correct"
f" ('{PIPELINE_ROOT_PATH}')"
f" ('{deployer_config.pipelines_root_path}')"
)

from deployer.pipeline_checks import Pipelines
Expand All @@ -302,7 +345,8 @@ def check(
pipelines_to_check = [pipeline_name]
if config_filepath is None:
to_check = {
p.value: list_config_filepaths(CONFIG_ROOT_PATH, p.value) for p in pipelines_to_check
p.value: list_config_filepaths(deployer_config.config_root_path, p.value)
for p in pipelines_to_check
}
else:
to_check = {p.value: [config_filepath] for p in pipelines_to_check}
Expand All @@ -312,7 +356,12 @@ def check(
Pipelines.model_validate(
{
"pipelines": {
p: {"pipeline_name": p, "config_paths": config_filepaths}
p: {
"pipeline_name": p,
"config_paths": config_filepaths,
"pipelines_root_path": deployer_config.pipelines_root_path,
"config_root_path": deployer_config.config_root_path,
}
for p, config_filepaths in to_check.items()
}
}
Expand All @@ -339,13 +388,13 @@ def list(
if len(PipelineName.__members__) == 0:
logger.warning(
"No pipeline found. Please check that the pipeline root path is"
f" correct (current: '{PIPELINE_ROOT_PATH}')"
f" correct (current: '{deployer_config.pipelines_root_path}')"
)
raise typer.Exit()

if with_configs:
pipelines_dict = {
p.name: list_config_filepaths(CONFIG_ROOT_PATH, p.name)
p.name: list_config_filepaths(deployer_config.config_root_path, p.name)
for p in PipelineName.__members__.values()
}
else:
Expand All @@ -368,18 +417,18 @@ def create(
"""Create files structure for a new pipeline."""
logger.info(f"Creating pipeline {pipeline_name}")

if not Path(PIPELINE_ROOT_PATH).is_dir():
if not Path(deployer_config.pipelines_root_path).is_dir():
raise FileNotFoundError(
f"Pipeline root path '{PIPELINE_ROOT_PATH}' does not exist."
f"Pipeline root path '{deployer_config.pipelines_root_path}' does not exist."
" Please check that the pipeline root path is correct"
f" or create it with `mkdir -p {PIPELINE_ROOT_PATH}`."
f" or create it with `mkdir -p {deployer_config.pipelines_root_path}`."
)

pipeline_filepath = Path(PIPELINE_ROOT_PATH) / f"{pipeline_name}.py"
pipeline_filepath = Path(deployer_config.pipelines_root_path) / f"{pipeline_name}.py"
pipeline_filepath.touch(exist_ok=False)
pipeline_filepath.write_text(PIPELINE_MINIMAL_TEMPLATE.format(pipeline_name=pipeline_name))

config_dirpath = Path(CONFIG_ROOT_PATH) / pipeline_name
config_dirpath = Path(deployer_config.config_root_path) / pipeline_name
config_dirpath.mkdir(exist_ok=False)
for config_name in ["test", "dev", "prod"]:
config_filepath = config_dirpath / f"{config_name}.{config_type}"
Expand Down
Loading

0 comments on commit 56d69f4

Please sign in to comment.