Skip to content
Permalink
Browse files
[FLINK-27508] update pyflink-walkthrough playground for Flink 1.14 (#31)
  • Loading branch information
shba24 committed May 12, 2022
1 parent 22f1ba8 commit a3cc5afccfaba41b0a27a27c220db0c73f807c0c
Showing 4 changed files with 23 additions and 31 deletions.
@@ -20,8 +20,8 @@
# Build PyFlink Playground Image
###############################################################################

FROM apache/flink:1.13.1-scala_2.12-java8
ARG FLINK_VERSION=1.13.1
FROM apache/flink:1.14.4-scala_2.12-java8
ARG FLINK_VERSION=1.14.4

# Install python3.7 and pyflink
# Pyflink does not yet function with python3.9, and this image is build on
@@ -42,7 +42,7 @@ RUN set -ex; \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
python -m pip install --upgrade pip; \
pip install apache-flink==1.13.1; \
pip install apache-flink==1.14.4; \
pip install kafka-python;

# Download connector libraries
@@ -5,7 +5,7 @@
In this playground, you will learn how to build and run an end-to-end PyFlink pipeline for data analytics, covering the following steps:

* Reading data from a Kafka source;
* Creating data using a [UDF](https://ci.apache.org/projects/flink/flink-docs-release-1.13/dev/python/table-api-users-guide/udfs/python_udfs.html);
* Creating data using a [UDF](https://ci.apache.org/projects/flink/flink-docs-release-1.14/dev/python/table-api-users-guide/udfs/python_udfs.html);
* Performing a simple aggregation over the source data;
* Writing the results to Elasticsearch and visualizing them in Kibana.

@@ -20,7 +20,7 @@ version: '2.1'
services:
jobmanager:
build: .
image: pyflink/pyflink:1.13.1-scala_2.12
image: pyflink/pyflink:1.14.4-scala_2.12
volumes:
- .:/opt/pyflink-walkthrough
hostname: "jobmanager"
@@ -32,7 +32,7 @@ services:
environment:
- JOB_MANAGER_RPC_ADDRESS=jobmanager
taskmanager:
image: pyflink/pyflink:1.13.1-scala_2.12
image: pyflink/pyflink:1.14.4-scala_2.12
volumes:
- .:/opt/pyflink-walkthrough
expose:
@@ -50,7 +50,7 @@ services:
ports:
- "2181:2181"
kafka:
image: wurstmeister/kafka:2.12-2.2.1
image: wurstmeister/kafka:2.13-2.8.1
ports:
- "9092:9092"
depends_on:
@@ -59,8 +59,6 @@ services:
HOSTNAME_COMMAND: "route -n | awk '/UG[ \t]/{print $$2}'"
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_CREATE_TOPICS: "payment_msg:1:1"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
generator:
build: generator
image: generator:1.0
@@ -42,35 +42,29 @@ def log_processing():
payPlatform INT,
provinceId INT
) WITH (
'connector.type' = 'kafka',
'connector.version' = 'universal',
'connector.topic' = 'payment_msg',
'connector.properties.bootstrap.servers' = 'kafka:9092',
'connector.properties.group.id' = 'test_3',
'connector.startup-mode' = 'latest-offset',
'format.type' = 'json'
'connector' = 'kafka',
'topic' = 'payment_msg',
'properties.bootstrap.servers' = 'kafka:9092',
'properties.group.id' = 'test_3',
'scan.startup.mode' = 'latest-offset',
'format' = 'json'
)
"""

create_es_sink_ddl = """
CREATE TABLE es_sink (
CREATE TABLE es_sink(
province VARCHAR PRIMARY KEY,
pay_amount DOUBLE
) with (
'connector.type' = 'elasticsearch',
'connector.version' = '7',
'connector.hosts' = 'http://elasticsearch:9200',
'connector.index' = 'platform_pay_amount_1',
'connector.document-type' = 'payment',
'update-mode' = 'upsert',
'connector.flush-on-checkpoint' = 'true',
'connector.key-delimiter' = '$',
'connector.key-null-literal' = 'n/a',
'connector.bulk-flush.max-size' = '42mb',
'connector.bulk-flush.max-actions' = '32',
'connector.bulk-flush.interval' = '1000',
'connector.bulk-flush.backoff.delay' = '1000',
'format.type' = 'json'
'connector' = 'elasticsearch-7',
'hosts' = 'http://elasticsearch:9200',
'index' = 'platform_pay_amount_1',
'document-id.key-delimiter' = '$',
'sink.bulk-flush.max-size' = '42mb',
'sink.bulk-flush.max-actions' = '32',
'sink.bulk-flush.interval' = '1000',
'sink.bulk-flush.backoff.delay' = '1000',
'format' = 'json'
)
"""

0 comments on commit a3cc5af

Please sign in to comment.