Skip to content

Commit

Permalink
[BEAM-7390] Add code snippet for Latest (#10166)
Browse files Browse the repository at this point in the history
[BEAM-7390] Add code snippet for Latest (#10166)
  • Loading branch information
davidcavazos authored and aaltay committed Dec 17, 2019
1 parent 900c401 commit 1cd62ef
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# coding=utf-8
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import absolute_import
from __future__ import print_function


def latest_globally(test=None):
# [START latest_globally]
import apache_beam as beam
import time

def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
return time.mktime(time.strptime(time_str, format))

with beam.Pipeline() as pipeline:
latest_element = (
pipeline
| 'Create crops' >> beam.Create([
{'item': '🥬', 'harvest': '2020-02-24 00:00:00'},
{'item': '🍓', 'harvest': '2020-06-16 00:00:00'},
{'item': '🥕', 'harvest': '2020-07-17 00:00:00'},
{'item': '🍆', 'harvest': '2020-10-26 00:00:00'},
{'item': '🍅', 'harvest': '2020-10-01 00:00:00'},
])
| 'With timestamps' >> beam.Map(
lambda crop: beam.window.TimestampedValue(
crop['item'], to_unix_time(crop['harvest'])))
| 'Get latest element' >> beam.combiners.Latest.Globally()
| beam.Map(print)
)
# [END latest_globally]
if test:
test(latest_element)


def latest_per_key(test=None):
# [START latest_per_key]
import apache_beam as beam
import time

def to_unix_time(time_str, format='%Y-%m-%d %H:%M:%S'):
return time.mktime(time.strptime(time_str, format))

with beam.Pipeline() as pipeline:
latest_elements_per_key = (
pipeline
| 'Create crops' >> beam.Create([
('spring', {'item': '🥕', 'harvest': '2020-06-28 00:00:00'}),
('spring', {'item': '🍓', 'harvest': '2020-06-16 00:00:00'}),
('summer', {'item': '🥕', 'harvest': '2020-07-17 00:00:00'}),
('summer', {'item': '🍓', 'harvest': '2020-08-26 00:00:00'}),
('summer', {'item': '🍆', 'harvest': '2020-09-04 00:00:00'}),
('summer', {'item': '🥬', 'harvest': '2020-09-18 00:00:00'}),
('summer', {'item': '🍅', 'harvest': '2020-09-22 00:00:00'}),
('autumn', {'item': '🍅', 'harvest': '2020-10-01 00:00:00'}),
('autumn', {'item': '🥬', 'harvest': '2020-10-20 00:00:00'}),
('autumn', {'item': '🍆', 'harvest': '2020-10-26 00:00:00'}),
('winter', {'item': '🥬', 'harvest': '2020-02-24 00:00:00'}),
])
| 'With timestamps' >> beam.Map(
lambda pair: beam.window.TimestampedValue(
(pair[0], pair[1]['item']), to_unix_time(pair[1]['harvest'])))
| 'Get latest elements per key' >> beam.combiners.Latest.PerKey()
| beam.Map(print)
)
# [END latest_per_key]
if test:
test(latest_elements_per_key)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# coding=utf-8
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import absolute_import
from __future__ import print_function

import unittest

import mock

from apache_beam.examples.snippets.util import assert_matches_stdout
from apache_beam.testing.test_pipeline import TestPipeline

from . import latest


def check_latest_element(actual):
expected = '''[START latest_element]
🍆
[END latest_element]'''.splitlines()[1:-1]
assert_matches_stdout(actual, expected)


def check_latest_elements_per_key(actual):
expected = '''[START latest_elements_per_key]
('spring', '🥕')
('summer', '🍅')
('autumn', '🍆')
('winter', '🥬')
[END latest_elements_per_key]'''.splitlines()[1:-1]
assert_matches_stdout(actual, expected)


@mock.patch('apache_beam.Pipeline', TestPipeline)
@mock.patch(
'apache_beam.examples.snippets.transforms.aggregation.latest.print', str)
class LatestTest(unittest.TestCase):
def test_latest_globally(self):
latest.latest_globally(check_latest_element)

def test_latest_per_key(self):
latest.latest_per_key(check_latest_elements_per_key)


if __name__ == '__main__':
unittest.main()

0 comments on commit 1cd62ef

Please sign in to comment.