Skip to content

Commit

Permalink
[FLINK-13978] Add experimental support for building on Azure Pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
rmetzger committed Jan 30, 2020
1 parent 8699e03 commit 322bf18
Show file tree
Hide file tree
Showing 11 changed files with 530 additions and 31 deletions.
45 changes: 33 additions & 12 deletions azure-pipelines.yml
Expand Up @@ -13,23 +13,44 @@
# See the License for the specific language governing permissions and
# limitations under the License.

#
# This file defines an Azure Pipeline build for testing Flink. It is intended to be used
# with a free Azure Pipelines account.
# It has the following features:
# - default builds for pushes / pull requests to a Flink fork and custom AZP account
# - end2end tests
#
#
# For the "apache/flink" repository, we are using the pipeline definition located in
# tools/azure-pipelines/build-apache-repo.yml
# That file points to custom, self-hosted build agents for faster pull request build processing and
# integration with Flinkbot.
#

trigger:
branches:
include:
- '*'

resources:
containers:
# Container with Maven 3.2.5 to have the same environment everywhere.
# Container with Maven 3.2.5, SSL to have the same environment everywhere.
- container: flink-build-container
image: rmetzger/flink-ci:3
repositories:
- repository: templates
type: github
name: flink-ci/flink-azure-builds
endpoint: flink-ci
image: rmetzger/flink-ci:ubuntu-jdk8-amd64-e005e00

variables:
MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/**
CACHE_FALLBACK_KEY: maven | $(Agent.OS)
CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache


jobs:
- template: flink-build-jobs.yml@templates
- template: tools/azure-pipelines/jobs-template.yml
parameters:
stage_name: ci_build
test_pool_definition:
vmImage: 'ubuntu-latest'
e2e_pool_definition:
vmImage: 'ubuntu-latest'
environment: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11"



7 changes: 5 additions & 2 deletions flink-end-to-end-tests/run-nightly-tests.sh
Expand Up @@ -88,8 +88,11 @@ run_test "Resuming Externalized Checkpoint after terminal failure (rocks, increm
# Docker
################################################################################

run_test "Running Kerberized YARN on Docker test (default input)" "$END_TO_END_DIR/test-scripts/test_yarn_kerberos_docker.sh"
run_test "Running Kerberized YARN on Docker test (custom fs plugin)" "$END_TO_END_DIR/test-scripts/test_yarn_kerberos_docker.sh dummy-fs"
# Ignore these tests on Azure
if [ -z "$TF_BUILD" ] ; then
run_test "Running Kerberized YARN on Docker test (default input)" "$END_TO_END_DIR/test-scripts/test_yarn_kerberos_docker.sh"
run_test "Running Kerberized YARN on Docker test (custom fs plugin)" "$END_TO_END_DIR/test-scripts/test_yarn_kerberos_docker.sh dummy-fs"
fi

################################################################################
# High Availability
Expand Down
15 changes: 13 additions & 2 deletions flink-end-to-end-tests/test-scripts/kafka-common.sh
Expand Up @@ -30,6 +30,7 @@ KAFKA_DIR=$TEST_DATA_DIR/kafka_2.11-$KAFKA_VERSION
CONFLUENT_DIR=$TEST_DATA_DIR/confluent-$CONFLUENT_VERSION
SCHEMA_REGISTRY_PORT=8082
SCHEMA_REGISTRY_URL=http://localhost:${SCHEMA_REGISTRY_PORT}
MAX_RETRY_SECONDS=120

function setup_kafka_dist {
# download Kafka
Expand Down Expand Up @@ -67,10 +68,20 @@ function start_kafka_cluster {
$KAFKA_DIR/bin/zookeeper-server-start.sh -daemon $KAFKA_DIR/config/zookeeper.properties
$KAFKA_DIR/bin/kafka-server-start.sh -daemon $KAFKA_DIR/config/server.properties

start_time=$(date +%s)
# zookeeper outputs the "Node does not exist" bit to stderr
while [[ $($KAFKA_DIR/bin/zookeeper-shell.sh localhost:2181 get /brokers/ids/0 2>&1) =~ .*Node\ does\ not\ exist.* ]]; do
echo "Waiting for broker..."
sleep 1
current_time=$(date +%s)
time_diff=$((current_time - start_time))

if [ $time_diff -ge $MAX_RETRY_SECONDS ]; then
echo "Kafka cluster did not start after $MAX_RETRY_SECONDS seconds. Printing Kafka logs:"
cat $KAFKA_DIR/logs/*
exit 1
else
echo "Waiting for broker..."
sleep 1
fi
done
}

Expand Down
Expand Up @@ -46,6 +46,7 @@ kubectl create -f ${KUBERNETES_MODULE_DIR}/job-cluster-service.yaml
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB} ${FLINK_JOB_PARALLELISM} ${FLINK_JOB_ARGUMENTS}' < ${CONTAINER_SCRIPTS}/job-cluster-job.yaml.template | kubectl create -f -
envsubst '${FLINK_IMAGE_NAME} ${FLINK_JOB_PARALLELISM}' < ${CONTAINER_SCRIPTS}/task-manager-deployment.yaml.template | kubectl create -f -
kubectl wait --for=condition=complete job/flink-job-cluster --timeout=1h

kubectl cp `kubectl get pods | awk '/task-manager/ {print $1}'`:/cache/${OUTPUT_FILE} ${OUTPUT_VOLUME}/${OUTPUT_FILE}

check_result_hash "WordCount" ${OUTPUT_VOLUME}/${OUTPUT_FILE} "${RESULT_HASH}"
8 changes: 6 additions & 2 deletions flink-end-to-end-tests/test-scripts/test_streaming_kinesis.sh
Expand Up @@ -27,12 +27,16 @@ export AWS_ACCESS_KEY_ID=flinkKinesisTestFakeAccessKeyId
export AWS_SECRET_KEY=flinkKinesisTestFakeAccessKey

KINESALITE_PORT=4567
KINESALITE_HOST=kinesalite-container
KINESALITE_NETWORK=some



function start_kinesalite {
#docker run -d --rm --name flink-test-kinesis -p ${KINESALITE_PORT}:${KINESALITE_PORT} instructure/kinesalite
# override entrypoint to enable SSL
docker run -d --rm --entrypoint "/tini" \
--name flink-test-kinesis \
--name ${KINESALITE_HOST} --network ${KINESALITE_NETWORK} \
-p ${KINESALITE_PORT}:${KINESALITE_PORT} \
instructure/kinesalite -- \
/usr/src/app/node_modules/kinesalite/cli.js --path /var/lib/kinesalite --ssl
Expand Down Expand Up @@ -65,6 +69,6 @@ TEST_JAR="${END_TO_END_DIR}/flink-streaming-kinesis-test/target/KinesisExample.j
JVM_ARGS=${DISABLE_CERT_CHECKING_JAVA_OPTS} \
$FLINK_DIR/bin/flink run -p 1 -c org.apache.flink.streaming.kinesis.test.KinesisExampleTest $TEST_JAR \
--input-stream test-input --output-stream test-output \
--aws.endpoint https://localhost:${KINESALITE_PORT} --aws.credentials.provider.basic.secretkey fakekey --aws.credentials.provider.basic.accesskeyid fakeid \
--aws.endpoint https://${KINESALITE_HOST}:${KINESALITE_PORT} --aws.credentials.provider.basic.secretkey fakekey --aws.credentials.provider.basic.accesskeyid fakeid \
--flink.stream.initpos TRIM_HORIZON \
--flink.partition-discovery.interval-millis 1000
94 changes: 94 additions & 0 deletions tools/azure-pipelines/build-apache-repo.yml
@@ -0,0 +1,94 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#
# This file defines the Flink build for the "apache/flink" repository, including
# the following:
# - PR builds
# - custom triggered e2e tests
# - nightly builds



schedules:
- cron: "0 0 * * *"
displayName: Daily midnight build
branches:
include:
- master
always: true # run even if there were no changes to the mentioned branches

resources:
containers:
# Container with Maven 3.2.5, SSL to have the same environment everywhere.
- container: flink-build-container
image: rmetzger/flink-ci:ubuntu-jdk8-amd64-e005e00


variables:
MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER)'
CACHE_KEY: maven | $(Agent.OS) | **/pom.xml, !**/target/**
CACHE_FALLBACK_KEY: maven | $(Agent.OS)
CACHE_FLINK_DIR: $(Pipeline.Workspace)/flink_cache

stages:
# CI / PR triggered stage:
- stage: ci_build
displayName: "CI Build (custom builders)"
condition: not(eq(variables['Build.Reason'], in('Schedule', 'Manual')))
jobs:
- template: jobs-template.yml
parameters:
stage_name: ci_build
test_pool_definition:
name: Default
e2e_pool_definition:
vmImage: 'ubuntu-latest'
environment: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11"

# Special stage for midnight builds:
- stage: cron_build_on_azure_os_free_pool
displayName: "Cron build on free Azure Resource Pool"
dependsOn: [] # depending on an empty array makes the stages run in parallel
condition: or(eq(variables['Build.Reason'], 'Schedule'), eq(variables['MODE'], 'nightly'))
jobs:
- template: jobs-template.yml
parameters:
stage_name: cron_build_default
test_pool_definition:
vmImage: 'ubuntu-latest'
e2e_pool_definition:
vmImage: 'ubuntu-latest'
environment: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11"
- template: jobs-template.yml
parameters:
stage_name: cron_build_scala2_12
test_pool_definition:
vmImage: 'ubuntu-latest'
e2e_pool_definition:
vmImage: 'ubuntu-latest'
environment: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.12 -Phive-1.2.1"
- template: jobs-template.yml
parameters:
stage_name: cron_build_jdk11
test_pool_definition:
vmImage: 'ubuntu-latest'
e2e_pool_definition:
vmImage: 'ubuntu-latest'
environment: PROFILE="-Dhadoop.version=2.8.3 -Dinclude_hadoop_aws -Dscala-2.11 -Djdk11"


129 changes: 129 additions & 0 deletions tools/azure-pipelines/jobs-template.yml
@@ -0,0 +1,129 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

parameters:
test_pool_definition: # where is compiliation and unit test execution happening?
e2e_pool_definion: # where is e2e test execution happening?
stage_name: # needed to make job names unique if they are included multiple times
environment: # used to pass environment variables into the downstream scripts

jobs:
- job: compile_${{parameters.stage_name}}
condition: not(eq(variables['MODE'], 'e2e'))
pool: ${{parameters.test_pool_definition}}
container: flink-build-container
timeoutInMinutes: 240
cancelTimeoutInMinutes: 1
workspace:
clean: all
steps:

# Preparation
- task: CacheBeta@1
inputs:
key: $(CACHE_KEY)
restoreKeys: $(CACHE_FALLBACK_KEY)
path: $(MAVEN_CACHE_FOLDER)
cacheHitVar: CACHE_RESTORED
continueOnError: true # continue the build even if the cache fails.
displayName: Cache Maven local repo

# Compile
- script: STAGE=compile ${{parameters.environment}} ./tools/azure_controller.sh compile
displayName: Build

# upload artifacts for next stage
- task: PublishPipelineArtifact@1
inputs:
path: $(CACHE_FLINK_DIR)
artifact: FlinkCompileCacheDir-${{parameters.stage_name}}

- job: test_${{parameters.stage_name}}
dependsOn: compile_${{parameters.stage_name}}
condition: not(eq(variables['MODE'], 'e2e'))
pool: ${{parameters.test_pool_definition}}
container: flink-build-container
timeoutInMinutes: 240
cancelTimeoutInMinutes: 1
workspace:
clean: all
strategy:
matrix:
core:
module: core
python:
module: python
libraries:
module: libraries
blink_planner:
module: blink_planner
connectors:
module: connectors
kafka_gelly:
module: kafka/gelly
tests:
module: tests
legacy_scheduler_core:
module: legacy_scheduler_core
legacy_scheduler_tests:
module: legacy_scheduler_tests
misc:
module: misc
steps:

# download artifacts
- task: DownloadPipelineArtifact@2
inputs:
path: $(CACHE_FLINK_DIR)
artifact: FlinkCompileCacheDir-${{parameters.stage_name}}

# recreate "build-target" symlink for python tests
- script: |
ls -lisah $(CACHE_FLINK_DIR)
ls -lisah .
ln -snf $(CACHE_FLINK_DIR)/flink-dist/target/flink-*-SNAPSHOT-bin/flink-*-SNAPSHOT $(CACHE_FLINK_DIR)/build-target
displayName: Recreate 'build-target' symlink
# Test
- script: STAGE=test ${{parameters.environment}} ./tools/azure_controller.sh $(module)
displayName: Test - $(module)

- task: PublishTestResults@2
inputs:
testResultsFormat: 'JUnit'


- job: e2e_${{parameters.stage_name}}
condition: eq(variables['MODE'], 'e2e')
# We are not running this job on a container, but in a VM.
pool: ${{parameters.e2e_pool_definition}}
timeoutInMinutes: 240
cancelTimeoutInMinutes: 1
workspace:
clean: all
steps:
- task: CacheBeta@1
inputs:
key: $(CACHE_KEY)
restoreKeys: $(CACHE_FALLBACK_KEY)
path: $(MAVEN_CACHE_FOLDER)
cacheHitVar: CACHE_RESTORED
displayName: Cache Maven local repo
- script: ./tools/travis/setup_maven.sh
- script: ./tools/azure-pipelines/setup_kubernetes.sh
- script: M2_HOME=/home/vsts/maven_cache/apache-maven-3.2.5/ PATH=/home/vsts/maven_cache/apache-maven-3.2.5/bin:$PATH PROFILE="-Dinclude-hadoop -Dhadoop.version=2.8.3 -De2e-metrics -Dmaven.wagon.http.pool=false" STAGE=compile ./tools/azure_controller.sh compile
displayName: Build
- script: FLINK_DIR=`pwd`/build-target flink-end-to-end-tests/run-nightly-tests.sh
displayName: Run nightly e2e tests

34 changes: 34 additions & 0 deletions tools/azure-pipelines/setup_kubernetes.sh
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Download minikube.
echo "Download minikube"
curl -Lo minikube https://storage.googleapis.com/minikube/releases/v0.25.2/minikube-linux-amd64 && chmod +x minikube && sudo mv minikube /usr/local/bin/
sudo minikube start --vm-driver=none --kubernetes-version=v1.9.0

echo "Move files and change permissions"
sudo mv /root/.kube $HOME/.kube # this will write over any previous configuration
sudo chown -R $USER $HOME/.kube
sudo chgrp -R $USER $HOME/.kube

sudo mv /root/.minikube $HOME/.minikube # this will write over any previous configuration
sudo chown -R $USER $HOME/.minikube
sudo chgrp -R $USER $HOME/.minikube

echo "Fix the kubectl context, as it's often stale."
minikube update-context
echo "Wait for Kubernetes to be up and ready."
JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'; until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do sleep 1; done

0 comments on commit 322bf18

Please sign in to comment.