diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 377112d3c..565a348ae 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -74,43 +74,73 @@ jobs: working-directory: tests/integration run: | docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - mkdir logs ./download_models.sh ${{ matrix.arch }} - - name: Test Pytorch python mode + - name: Test Python model if: ${{ matrix.arch != 'cpu' }} working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ - serve -m test::Python=file:///opt/ml/model/resnet18_serving_all.zip - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip + ./test_client.sh image/jpg models/kitten.jpg + ./test_client.sh tensor/ndlist 1,3,224,224 + ./test_client.sh tensor/npz 1,3,224,224 + docker rm -f $(docker ps -aq) + - name: Test dynamic batch with Python model + if: ${{ matrix.arch != 'cpu' }} + working-directory: tests/integration + run: | + echo -en "SERVING_BATCH_SIZE=2\nSERVING_MAX_BATCH_DELAY=30000" > docker_env + ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ + serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip + EXPECT_TIMEOUT=1 ./test_client.sh image/jpg models/kitten.jpg + ./test_client.sh image/jpg models/kitten.jpg + docker rm -f $(docker ps -aq) + rm docker_env + - name: Test PyTorch model + working-directory: tests/integration + run: | + ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ + serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip + ./test_client.sh image/jpg models/kitten.jpg + docker rm -f $(docker ps -aq) + - name: Test PyTorch model binary mode + working-directory: tests/integration + run: | + ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ + serve -m "test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip?translatorFactory=ai.djl.translate.NoopServingTranslatorFactory&application=undefined" + ./test_client.sh tensor/ndlist 1,3,224,224 + ./test_client.sh tensor/npz 1,3,224,224 docker rm -f $(docker ps -aq) - - name: Test PyTorch Binary mode + - name: Test dynamic batch with PyTorch model working-directory: tests/integration run: | + echo -en "SERVING_BATCH_SIZE=2\nSERVING_MAX_BATCH_DELAY=30000" > docker_env ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ - serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip + EXPECT_TIMEOUT=1 ./test_client.sh image/jpg models/kitten.jpg + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test MxNet binary mode + rm docker_env + - name: Test MxNet model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ - serve -m test::MXNet=file:///opt/ml/model/ssd_resnet50.zip?model_name=ssd_resnet50 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::MXNet=file:/opt/ml/model/ssd_resnet50.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test ONNX binary mode + - name: Test ONNX model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ - serve -m test::OnnxRuntime=file:///opt/ml/model/resnet18-v1-7.zip?model_name=resnet18-v1-7 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::OnnxRuntime=file:/opt/ml/model/resnet18-v1-7.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test Tensorflow binary mode + - name: Test TensorFlow model binary mode working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \ - serve -m test::TensorFlow=file:///opt/ml/model/resnet50v1.zip?model_name=resnet50 - ./test_client.sh http://127.0.0.1:8080/predictions/test tensor/ndlist 1,224,224,3 + serve -m test::TensorFlow=file:/opt/ml/model/resnet50v1.zip?model_name=resnet50 + ./test_client.sh tensor/ndlist 1,224,224,3 docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} @@ -157,19 +187,19 @@ jobs: docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG mkdir logs ./download_models.sh pytorch-inf1 - - name: Test Pytorch Binary mode + - name: Test Pytorch model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \ - serve -m test::PyTorch=file:///opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::PyTorch=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12 + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test Pytorch Python mode + - name: Test Python mode working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \ - serve -m test::Python=file:///opt/ml/model/resnet18_inf1_1_12.tar.gz - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::Python=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} @@ -216,19 +246,19 @@ jobs: docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG mkdir logs ./download_models.sh pytorch-cu117 - - name: Test Pytorch python mode + - name: Test Python model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-cu117 \ - serve -m test::Python=file:///opt/ml/model/resnet18_serving_all.zip - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test Pytorch Binary mode + - name: Test PyTorch model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-cu117 \ - serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} @@ -274,19 +304,19 @@ jobs: docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG mkdir logs ./download_models.sh aarch64 - - name: Test PyTorch Binary mode + - name: Test PyTorch model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models aarch64 \ - serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - - name: Test ONNX binary mode + - name: Test ONNX model working-directory: tests/integration run: | ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models aarch64 \ - serve -m test::OnnxRuntime=file:///opt/ml/model/resnet18-v1-7.zip?model_name=resnet18-v1-7 - ./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg + serve -m test::OnnxRuntime=file:/opt/ml/model/resnet18-v1-7.zip + ./test_client.sh image/jpg models/kitten.jpg docker rm -f $(docker ps -aq) - name: Check telemetry collection working-directory: tests/integration diff --git a/tests/integration/.gitignore b/tests/integration/.gitignore new file mode 100644 index 000000000..3875dea7c --- /dev/null +++ b/tests/integration/.gitignore @@ -0,0 +1,3 @@ +/docker_env +/logs +/models diff --git a/tests/integration/download_models.sh b/tests/integration/download_models.sh index ba2c85945..b12f1bb9d 100755 --- a/tests/integration/download_models.sh +++ b/tests/integration/download_models.sh @@ -1,43 +1,44 @@ #!/bin/bash +set -e + platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu117" "pytorch-inf1" "aarch64" rm -rf models mkdir models && cd models +curl -sf -O https://resources.djl.ai/images/kitten.jpg # all platform models except aarch general_platform_models_urls=( -"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip" -"https://resources.djl.ai/test-models/tensorflow/resnet50v1.zip" -"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip" -"https://resources.djl.ai/test-models/mxnet/ssd_resnet50.zip" + "https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip" + "https://resources.djl.ai/test-models/tensorflow/resnet50v1.zip" + "https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip" + "https://resources.djl.ai/test-models/mxnet/ssd_resnet50.zip" ) # only pytorch and onnx models aarch_models_urls=( -"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip" -"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip" + "https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip" + "https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip" ) inf_models_urls=( -"https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz" + "https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz" ) -download() -{ +download() { urls=("$@") - for url in "${urls[@]}" - do + for url in "${urls[@]}"; do filename=${url##*/} # does not download the file, if file already exists if ! [ -f "${filename}" ]; then - curl -O "$url" + curl -sf -O "$url" fi done } case $platform in -cpu|cpu-full|pytorch-cu117) +cpu | cpu-full | pytorch-cu117) download "${general_platform_models_urls[@]}" ;; pytorch-inf1) @@ -48,5 +49,6 @@ aarch64) ;; *) echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu117, pytorch-inf1, aarch64" - exit 1;; + exit 1 + ;; esac diff --git a/tests/integration/launch_container.sh b/tests/integration/launch_container.sh index cec9e49b3..0aec6d74f 100755 --- a/tests/integration/launch_container.sh +++ b/tests/integration/launch_container.sh @@ -20,6 +20,13 @@ elif [[ "$platform" == *"inf1"* ]]; then # if the platform is inferentia host_device="/dev/neuron0" fi +if [[ -f ${PWD}/docker_env ]]; then + env_file="--env-file ${PWD}/docker_env" +fi + +rm -rf logs +mkdir logs + set -x # start the docker container container_id=$(docker run \ @@ -28,8 +35,10 @@ container_id=$(docker run \ --network="host" \ -v ${model_path}:/opt/ml/model \ -v ${PWD}/logs:/opt/djl/logs \ - -v ~/.aws:/root/.aws \ + -v ~/.aws:/home/djl/.aws \ + ${env_file} \ -e TEST_TELEMETRY_COLLECTION='true' \ + -u djl \ ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+--device "${host_device}"} \ @@ -53,7 +62,7 @@ while true; do echo "DJL serving started" break fi - if [[ "$( docker ps | wc -l )" == "1" ]]; then + if [[ "$(docker ps | wc -l)" == "1" ]]; then echo "Docker container shut down" exit 1 fi diff --git a/tests/integration/test_client.sh b/tests/integration/test_client.sh index 9bdd555e1..863832ce6 100755 --- a/tests/integration/test_client.sh +++ b/tests/integration/test_client.sh @@ -1,36 +1,45 @@ #!/usr/bin/env bash -set -e +content_type=$1 +content=$2 +url=$3 -url=$1 -content_type=$2 -content=$3 +if [[ -z "$url" ]]; then + url="http://127.0.0.1:8080/predictions/test" +fi echo "Testing $url with content type: $content_type ..." if [[ "$content_type" == "tensor/ndlist" ]]; then djl-bench ndlist-gen -s $content -o test.ndlist - curl -f -X POST $url \ - -T test.ndlist \ - -H "Content-type: $content_type" - rm -rf test.ndlist + curl -sf -m 2 -X POST $url -o out.ndlist -T test.ndlist -H "Content-type: $content_type" + ret=$? + rm -rf test.ndlist out.ndlist elif [[ "$content_type" == "tensor/npz" ]]; then djl-bench ndlist-gen -s $content -z -o test.npz - curl -f -X POST $url \ - -T test.npz \ - -H "Content-type: $content_type" - rm -rf test.npz + curl -sf -m 2 -X POST $url -o out.npz -T test.npz -H "Content-type: $content_type" + ret=$? + rm -rf test.npz out.npz elif [[ "$content_type" == "text/plain" ]]; then - curl -f -X POST $url \ - -d "$content" \ - -H "Content-type: $content_type" + curl -sf -m 2 -X POST $url -d "$content" -H "Content-type: $content_type" + ret=$? elif [[ "$content_type" == "image/jpg" ]]; then - curl $content -o test.jpg - curl -f -X POST $url \ - -T test.jpg \ - -H "Content-type: $content_type" - rm -rf test.jpg + curl -sf -m 2 -X POST $url -T $content -H "Content-type: $content_type" + ret=$? else echo "Content type $content_type not supported!" exit 1 fi + +if [[ -z "$EXPECT_TIMEOUT" ]]; then + if [[ $ret -ne 0 ]]; then + echo "Request failed: $ret" + exit 1 + fi +else + if [[ $ret -ne 28 ]]; then + echo "Expecting time out, actual: $ret" + exit 1 + fi +fi +