## Advanced Combiner of Beam

In [2]:
import apache_beam as beam
import apache_beam.runners.interactive.interactive_beam as ib
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner

In [4]:
class AvergeFn(beam.CombineFn):
    def create_accumulator(self):
        return (0.0, 0)   # initialize (sum, count)
    def add_input(self, sum_count, input):
        sum, count = sum_count
        return sum + input, count + 1
    def merge_accumulators(self, accumulators):
        # zip - [(27, 3), (39, 3), (18, 2) --> [(27, 39, 18), (3, 3, 2)]
        ind_sums, ind_counts = zip(*accumulators)
        return sum(ind_sums), sum(ind_counts)
    def extract_output(self, sum_count):
        sum, count = sum_count
        return sum / count if count else float("NaN")

p = beam.Pipeline(InteractiveRunner())

small_sum = (
    p
    | beam.Create([15, 5, 7, 7, 9, 23, 13, 5])
    | beam.CombineGlobally(AvergeFn())
)

ib.show(small_sum)