Skip to content

Commit

Permalink
rename conditional branching example to nyt and add README (#6757)
Browse files Browse the repository at this point in the history
  • Loading branch information
sryza committed Feb 23, 2022
1 parent d7d1351 commit 640aa5d
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 17 deletions.
13 changes: 13 additions & 0 deletions examples/nyt-feed/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## New York Times feed example

A job that pulls down metadata about New York Times articles, writes them to a CSV, and reports them in Slack.

### Loading the example in Dagit

dagit -w workspace.yaml

### Features demonstrated

- Conditional branching.
- Custom `IOManager`.
- Reusing an op twice in the same job.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def fetch_stories():
for article in tree[0].findall("item"):
all_articles.append(article)

if [
category for category in article.findall("category") if category.text == "New York City"
]:
if any(category.text == "New York City" for category in article.findall("category")):
nyc_articles.append(article)

yield Output(all_articles, "all_articles")
Expand Down Expand Up @@ -97,10 +95,8 @@ def send_slack_msg(context, articles):
context.resources.slack.chat_postMessage(channel="my-news-channel", text=formatted_str)


@job(
resource_defs={"slack": mock_slack_resource},
)
def conditional_branching():
@job(resource_defs={"slack": mock_slack_resource})
def process_nyt_feed():
all_articles, nyc_articles = fetch_stories()
write_to_csv.alias("nyc_csv")(parse_xml(nyc_articles))
write_to_csv.alias("all_csv")(parse_xml(all_articles))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@
import pandas as pd
from dagster import build_init_resource_context, build_input_context, build_output_context
from dagster.core.test_utils import instance_for_test
from dagster_conditional_branching.conditional_branching_job import (
conditional_branching,
df_to_csv_io_manager,
)
from nyt_feed.nyt_feed_job import df_to_csv_io_manager, process_nyt_feed


def test_conditional_branching():
def test_nyt_feed_job():
with tempfile.TemporaryDirectory() as tmp_dir:
with instance_for_test(temp_dir=tmp_dir) as instance:
assert conditional_branching.execute_in_process(
assert process_nyt_feed.execute_in_process(
instance=instance,
run_config={
"ops": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import find_packages, setup

setup(
name="dagster_conditional_branching",
name="nyt_feed",
version="dev",
author_email="hello@elementl.com",
packages=find_packages(exclude=["test"]),
Expand All @@ -12,8 +12,8 @@
include_package_data=True,
author="Elementl",
license="Apache-2.0",
description="Dagster example for an ETL pipeline that branches based on run-time info",
url="https://github.com/dagster-io/dagster/tree/master/examples/dagster-conditional-branching",
description="Dagster example for an ETL pipeline that pulls down metadata about New York Times articles, writes them to a CSV, and reports them in Slack.",
url="https://github.com/dagster-io/dagster/tree/master/examples/nyt-feed",
classifiers=[
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ whitelist_externals =
pylint
commands =
echo -e "--- \033[0;32m:lint-roller: Running pylint\033[0m"
pylint -j 0 --rcfile=../../.pylintrc dagster_conditional_branching dagster_conditional_branching_tests
pylint -j 0 --rcfile=../../.pylintrc nyt_feed nyt_feed_tests

0 comments on commit 640aa5d

Please sign in to comment.