fix(examples): Rewrite examples using the chain style API (#147)

evanh · web-flow · commit 26daf2d39726 · 2025-07-08T11:56:15.000-04:00
Rewrite the examples to use the chain style API. The billing example is
migrated in a separate PR. Remove some of the helper files since they
weren't being used outside of the example and weren't very complicated.

Also skip the `alerts.py` file since it uses a FlatMap, and that has to
be implemented in a separate PR.
diff --git a/sentry_streams/sentry_streams/examples/broadcast_fn.py b/sentry_streams/sentry_streams/examples/broadcast_fn.py
@@ -1,5 +1,7 @@
 import json
 
+from sentry_streams.pipeline.message import Message
+
 
 class BroadcastFunctions:
     """
@@ -11,15 +13,15 @@ class BroadcastFunctions:
     """
 
     @staticmethod
-    def no_op_map(value: str) -> str:
-        return value
+    def no_op_map(value: Message[bytes]) -> str:
+        return value.payload.decode("utf-8")
 
     @staticmethod
-    def hello_map(value: str) -> str:
-        name = json.loads(value)["name"]
+    def hello_map(value: Message[str]) -> str:
+        name = json.loads(value.payload)["name"]
         return f"Hello, {name}!"
 
     @staticmethod
-    def goodbye_map(value: str) -> str:
-        name = json.loads(value)["name"]
+    def goodbye_map(value: Message[str]) -> str:
+        name = json.loads(value.payload)["name"]
         return f"Goodbye, {name}."
diff --git a/sentry_streams/sentry_streams/examples/span_helpers.py b/sentry_streams/sentry_streams/examples/span_helpers.py
@@ -0,0 +1,55 @@
+import json
+from dataclasses import dataclass
+from typing import Self
+
+from sentry_kafka_schemas.schema_types.snuba_spans_v1 import SpanEvent
+
+from sentry_streams.pipeline.function_template import (
+    Accumulator,
+)
+from sentry_streams.pipeline.message import Message
+
+
+@dataclass
+class Segment:
+    total_duration: int
+    spans: list[SpanEvent]
+
+
+def build_segment_json(message: Message[Segment]) -> str:
+    """
+    Build a JSON str from a Segment
+    """
+    value = message.payload
+    d = {"segment": value.spans, "total_duration": value.total_duration}
+
+    return json.dumps(d)
+
+
+class SpansBuffer(Accumulator[Message[SpanEvent], Segment]):
+    """
+    Ingests spans into a window. Builds a Segment from each
+    window, which contains the list of SpanEvents seen as well
+    as the total duration across SpanEvents.
+
+    TODO: Group by trace_id
+    """
+
+    def __init__(self) -> None:
+        self.spans_list: list[SpanEvent] = []
+        self.total_duration = 0
+
+    def add(self, value: Message[SpanEvent]) -> Self:
+        self.spans_list.append(value.payload)
+        self.total_duration += value.payload["duration_ms"]
+
+        return self
+
+    def get_value(self) -> Segment:
+        return Segment(self.total_duration, self.spans_list)
+
+    def merge(self, other: Self) -> Self:
+        self.spans_list = self.spans_list + other.spans_list
+        self.total_duration = self.total_duration + other.total_duration
+
+        return self
diff --git a/sentry_streams/sentry_streams/examples/spans.py b/sentry_streams/sentry_streams/examples/spans.py
diff --git a/sentry_streams/sentry_streams/examples/spans_buffer.py b/sentry_streams/sentry_streams/examples/spans_buffer.py
@@ -1,29 +1,10 @@
 from datetime import timedelta
 
-from sentry_streams.examples.spans import SpansBuffer, build_segment_json, build_span
-from sentry_streams.pipeline.pipeline import (
-    Aggregate,
-    Map,
-    Pipeline,
-    StreamSink,
-    StreamSource,
-)
-from sentry_streams.pipeline.window import TumblingWindow
-
-pipeline = Pipeline()
+from sentry_kafka_schemas.schema_types.snuba_spans_v1 import SpanEvent
 
-source = StreamSource(
-    name="myinput",
-    ctx=pipeline,
-    stream_name="events",
-)
-
-map = Map(
-    name="mymap",
-    ctx=pipeline,
-    inputs=[source],
-    function=build_span,
-)
+from sentry_streams.examples.span_helpers import SpansBuffer, build_segment_json
+from sentry_streams.pipeline import Map, Parser, Reducer, StreamSink, streaming_source
+from sentry_streams.pipeline.window import TumblingWindow
 
 # A sample window.
 # Windows are open for 5 seconds max
@@ -35,24 +16,26 @@
 # Make the trigger and closing windows synonymous, both
 # apparent in the API and as part of implementation
 
-reduce = Aggregate(
-    name="myreduce",
-    ctx=pipeline,
-    inputs=[map],
-    window=reduce_window,
-    aggregate_func=SpansBuffer,
-)
-
-map_str = Map(
-    name="map_str",
-    ctx=pipeline,
-    inputs=[reduce],
-    function=build_segment_json,
-)
-
-sink = StreamSink(
-    name="kafkasink",
-    ctx=pipeline,
-    inputs=[map_str],
-    stream_name="transformed-events",
+pipeline = (
+    streaming_source(name="myinput", stream_name="events")
+    .apply("mymap", Parser(msg_type=SpanEvent))
+    .apply(
+        "myreduce",
+        Reducer(
+            window=reduce_window,
+            aggregate_func=SpansBuffer,
+        ),
+    )
+    .apply(
+        "map_str",
+        Map(
+            function=build_segment_json,
+        ),
+    )
+    .sink(
+        "kafkasink",
+        StreamSink(
+            stream_name="transformed-events",
+        ),
+    )
 )
diff --git a/sentry_streams/sentry_streams/examples/word_counter.py b/sentry_streams/sentry_streams/examples/word_counter.py
@@ -1,59 +1,48 @@
-from sentry_streams.examples.word_counter_fn import (
-    EventsPipelineFilterFunctions,
-    EventsPipelineMapFunction,
+from sentry_streams.examples.word_counter_helpers import (
     GroupByWord,
     WordCounter,
+    simple_filter,
+    simple_map,
 )
-from sentry_streams.pipeline.pipeline import (
-    Aggregate,
-    Filter,
-    Map,
-    Pipeline,
-    StreamSink,
-    StreamSource,
-)
+from sentry_streams.pipeline import Filter, Map, Reducer, streaming_source
+from sentry_streams.pipeline.chain import StreamSink
 from sentry_streams.pipeline.window import TumblingWindow
 
-# pipeline: special name
-pipeline = Pipeline()
-
-source = StreamSource(
-    name="myinput",
-    ctx=pipeline,
-    stream_name="events",
-)
-
-filter = Filter(
-    name="myfilter",
-    ctx=pipeline,
-    inputs=[source],
-    function=EventsPipelineFilterFunctions.simple_filter,
-)
-
-map = Map(
-    name="mymap",
-    ctx=pipeline,
-    inputs=[filter],
-    function=EventsPipelineMapFunction.simple_map,
-)
-
 # A sample window.
 # Windows are assigned 3 elements.
 # TODO: Get the parameters for window in pipeline configuration.
 reduce_window = TumblingWindow(window_size=3)
 
-reduce: Aggregate[int, tuple[str, int], str] = Aggregate(
-    name="myreduce",
-    ctx=pipeline,
-    inputs=[map],
-    window=reduce_window,
-    aggregate_func=WordCounter,
-    group_by_key=GroupByWord(),
-)
-
-sink = StreamSink(
-    name="kafkasink",
-    ctx=pipeline,
-    inputs=[reduce],
-    stream_name="transformed-events",
+# pipeline: special name
+pipeline = (
+    streaming_source(
+        name="myinput",
+        stream_name="events",
+    )
+    .apply(
+        "myfilter",
+        Filter(
+            function=simple_filter,
+        ),
+    )
+    .apply(
+        "mymap",
+        Map(
+            function=simple_map,
+        ),
+    )
+    .apply(
+        "myreduce",
+        Reducer(
+            window=reduce_window,
+            aggregate_func=WordCounter,
+            group_by_key=GroupByWord(),
+        ),
+    )
+    .sink(
+        "kafkasink",
+        StreamSink(
+            stream_name="transformed-events",
+        ),
+    )
 )
diff --git a/sentry_streams/sentry_streams/examples/word_counter_helpers.py b/sentry_streams/sentry_streams/examples/word_counter_helpers.py
@@ -2,6 +2,7 @@
 from typing import Self
 
 from sentry_streams.pipeline.function_template import Accumulator, GroupBy
+from sentry_streams.pipeline.message import Message
 
 
 class EventsPipelineMapFunction:
@@ -55,13 +56,25 @@ def get_group_by_key(self, payload: tuple[str, int]) -> str:
         return payload[0]
 
 
-class WordCounter(Accumulator[tuple[str, int], str]):
+def simple_filter(value: Message[bytes]) -> bool:
+    d = json.loads(value.payload)
+    return True if "name" in d else False
+
+
+def simple_map(value: Message[bytes]) -> tuple[str, int]:
+    d = json.loads(value.payload)
+    word: str = d.get("word", "null_word")
+
+    return (word, 1)
+
+
+class WordCounter(Accumulator[Message[tuple[str, int]], str]):
 
     def __init__(self) -> None:
         self.tup = ("", 0)
 
-    def add(self, value: tuple[str, int]) -> Self:
-        self.tup = (value[0], self.tup[1] + value[1])
+    def add(self, value: Message[tuple[str, int]]) -> Self:
+        self.tup = (value.payload[0], self.tup[1] + value.payload[1])
 
         return self
 
diff --git a/sentry_streams/uv.lock b/sentry_streams/uv.lock