Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Converting PickledObjectFilesystemIOManager to use UPathIOManager (#1…
…0273) * converted PickledObjectFilesystemIOManager to use UPathIOManager * remove experimental annotation * add kwargs for UPath * fix docstring * refactor load_input logic * fix typo * add UPathIOManager docs * UPathIOManager brought into main dagster scope * fix wording * allow omitting type annotations for loading multiple partitions * moved UPathIOManager docs to Examples * fix get_metadata call * remove pandas from tests & fix some issues with docs * add blank line * make mdx-format * fix issues * fix import * fix docs issues * fix graphql test & toy script * fix toy IO manager * fix apidoc * fix typo * remove unused import
- Loading branch information
1 parent
c5c7422
commit 40671c2
Showing
15 changed files
with
629 additions
and
267 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
examples/docs_snippets/docs_snippets/concepts/io_management/filesystem_io_manager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# start_marker | ||
import pandas as pd | ||
from upath import UPath | ||
|
||
from dagster import ( | ||
Field, | ||
InitResourceContext, | ||
InputContext, | ||
OutputContext, | ||
StringSource, | ||
UPathIOManager, | ||
io_manager, | ||
) | ||
|
||
|
||
class PandasParquetIOManager(UPathIOManager): | ||
extension: str = ".parquet" | ||
|
||
def dump_to_path(self, context: OutputContext, obj: pd.DataFrame, path: UPath): | ||
with path.open("wb") as file: | ||
obj.to_parquet(file) | ||
|
||
def load_from_path(self, context: InputContext, path: UPath) -> pd.DataFrame: | ||
with path.open("rb") as file: | ||
return pd.read_parquet(file) | ||
|
||
|
||
# end_class_marker | ||
|
||
# the IO Manager can be used with any filesystem (see https://github.com/fsspec/universal_pathlib) | ||
|
||
# start_def_marker | ||
@io_manager(config_schema={"base_path": Field(str, is_required=False)}) | ||
def local_pandas_parquet_io_manager( | ||
init_context: InitResourceContext, | ||
) -> PandasParquetIOManager: | ||
assert init_context.instance is not None # to please mypy | ||
base_path = UPath( | ||
init_context.resource_config.get( | ||
"base_path", init_context.instance.storage_directory() | ||
) | ||
) | ||
return PandasParquetIOManager(base_path=base_path) | ||
|
||
|
||
@io_manager( | ||
config_schema={ | ||
"base_path": Field(str, is_required=True), | ||
"AWS_ACCESS_KEY_ID": StringSource, | ||
"AWS_SECRET_ACCESS_KEY": StringSource, | ||
} | ||
) | ||
def s3_parquet_io_manager(init_context: InitResourceContext) -> PandasParquetIOManager: | ||
# `UPath` will read boto env vars. | ||
# The credentials can also be taken from the config and passed to `UPath` directly. | ||
base_path = UPath(init_context.resource_config.get("base_path")) | ||
assert str(base_path).startswith("s3://"), base_path | ||
return PandasParquetIOManager(base_path=base_path) | ||
|
||
|
||
# end_marker |
39 changes: 39 additions & 0 deletions
39
...ocs_snippets/docs_snippets/concepts/io_management/loading_multiple_upstream_partitions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from datetime import datetime | ||
from typing import Dict | ||
|
||
import pandas as pd | ||
|
||
from dagster import ( | ||
DailyPartitionsDefinition, | ||
HourlyPartitionsDefinition, | ||
OpExecutionContext, | ||
asset, | ||
materialize, | ||
) | ||
|
||
start = datetime(2022, 1, 1) | ||
|
||
hourly_partitions = HourlyPartitionsDefinition(start_date=f"{start:%Y-%m-%d-%H:%M}") | ||
daily_partitions = DailyPartitionsDefinition(start_date=f"{start:%Y-%m-%d}") | ||
|
||
|
||
@asset(partitions_def=hourly_partitions) | ||
def upstream_asset(context: OpExecutionContext) -> pd.DataFrame: | ||
return pd.DataFrame({"date": [context.partition_key]}) | ||
|
||
|
||
@asset( | ||
partitions_def=daily_partitions, | ||
) | ||
def downstream_asset(upstream_asset: Dict[str, pd.DataFrame]) -> pd.DataFrame: | ||
return pd.concat(list(upstream_asset.values())) | ||
|
||
|
||
result = materialize( | ||
[*upstream_asset.to_source_assets(), downstream_asset], | ||
partition_key=start.strftime(daily_partitions.fmt), | ||
) | ||
downstream_asset_data = result.output_for_node("downstream_asset", "result") | ||
assert ( | ||
len(downstream_asset_data) == 24 | ||
), "downstream day should map to upstream 24 hours" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
40671c2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
dagster – ./docs/next
dagster.vercel.app
dagster-elementl.vercel.app
new-docs.dagster.io
dagster-git-master-elementl.vercel.app
docs.dagster.io