Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[serving] Adds a few more integration tests #384

Merged
merged 1 commit into from
Dec 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 64 additions & 34 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,43 +74,73 @@ jobs:
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh ${{ matrix.arch }}
- name: Test Pytorch python mode
- name: Test Python model
if: ${{ matrix.arch != 'cpu' }}
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::Python=file:///opt/ml/model/resnet18_serving_all.zip
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip
./test_client.sh image/jpg models/kitten.jpg
./test_client.sh tensor/ndlist 1,3,224,224
./test_client.sh tensor/npz 1,3,224,224
docker rm -f $(docker ps -aq)
- name: Test dynamic batch with Python model
if: ${{ matrix.arch != 'cpu' }}
working-directory: tests/integration
run: |
echo -en "SERVING_BATCH_SIZE=2\nSERVING_MAX_BATCH_DELAY=30000" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip
EXPECT_TIMEOUT=1 ./test_client.sh image/jpg models/kitten.jpg
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
rm docker_env
- name: Test PyTorch model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test PyTorch model binary mode
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m "test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip?translatorFactory=ai.djl.translate.NoopServingTranslatorFactory&application=undefined"
./test_client.sh tensor/ndlist 1,3,224,224
./test_client.sh tensor/npz 1,3,224,224
docker rm -f $(docker ps -aq)
- name: Test PyTorch Binary mode
- name: Test dynamic batch with PyTorch model
working-directory: tests/integration
run: |
echo -en "SERVING_BATCH_SIZE=2\nSERVING_MAX_BATCH_DELAY=30000" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip
EXPECT_TIMEOUT=1 ./test_client.sh image/jpg models/kitten.jpg
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test MxNet binary mode
rm docker_env
- name: Test MxNet model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::MXNet=file:///opt/ml/model/ssd_resnet50.zip?model_name=ssd_resnet50
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::MXNet=file:/opt/ml/model/ssd_resnet50.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test ONNX binary mode
- name: Test ONNX model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::OnnxRuntime=file:///opt/ml/model/resnet18-v1-7.zip?model_name=resnet18-v1-7
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::OnnxRuntime=file:/opt/ml/model/resnet18-v1-7.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test Tensorflow binary mode
- name: Test TensorFlow model binary mode
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.arch }} \
serve -m test::TensorFlow=file:///opt/ml/model/resnet50v1.zip?model_name=resnet50
./test_client.sh http://127.0.0.1:8080/predictions/test tensor/ndlist 1,224,224,3
serve -m test::TensorFlow=file:/opt/ml/model/resnet50v1.zip?model_name=resnet50
./test_client.sh tensor/ndlist 1,224,224,3
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
Expand Down Expand Up @@ -157,19 +187,19 @@ jobs:
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh pytorch-inf1
- name: Test Pytorch Binary mode
- name: Test Pytorch model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \
serve -m test::PyTorch=file:///opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::PyTorch=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz?model_name=resnet18_inf1_1_12
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test Pytorch Python mode
- name: Test Python mode
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf1 \
serve -m test::Python=file:///opt/ml/model/resnet18_inf1_1_12.tar.gz
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::Python=file:/opt/ml/model/resnet18_inf1_1_12.tar.gz
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
Expand Down Expand Up @@ -216,19 +246,19 @@ jobs:
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh pytorch-cu117
- name: Test Pytorch python mode
- name: Test Python model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-cu117 \
serve -m test::Python=file:///opt/ml/model/resnet18_serving_all.zip
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::Python=file:/opt/ml/model/resnet18_serving_all.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test Pytorch Binary mode
- name: Test PyTorch model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-cu117 \
serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
Expand Down Expand Up @@ -274,19 +304,19 @@ jobs:
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh aarch64
- name: Test PyTorch Binary mode
- name: Test PyTorch model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models aarch64 \
serve -m test::PyTorch=file:///opt/ml/model/resnet18_serving_all.zip?model_name=resnet18
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::PyTorch=file:/opt/ml/model/resnet18_serving_all.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test ONNX binary mode
- name: Test ONNX model
working-directory: tests/integration
run: |
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models aarch64 \
serve -m test::OnnxRuntime=file:///opt/ml/model/resnet18-v1-7.zip?model_name=resnet18-v1-7
./test_client.sh http://127.0.0.1:8080/predictions/test image/jpg https://resources.djl.ai/images/kitten.jpg
serve -m test::OnnxRuntime=file:/opt/ml/model/resnet18-v1-7.zip
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Check telemetry collection
working-directory: tests/integration
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/docker_env
/logs
/models
30 changes: 16 additions & 14 deletions tests/integration/download_models.sh
Original file line number Diff line number Diff line change
@@ -1,43 +1,44 @@
#!/bin/bash

set -e

platform=$1 # expected values are "cpu" "cpu-full" "pytorch-cu117" "pytorch-inf1" "aarch64"

rm -rf models
mkdir models && cd models
curl -sf -O https://resources.djl.ai/images/kitten.jpg

# all platform models except aarch
general_platform_models_urls=(
"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip"
"https://resources.djl.ai/test-models/tensorflow/resnet50v1.zip"
"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip"
"https://resources.djl.ai/test-models/mxnet/ssd_resnet50.zip"
"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip"
"https://resources.djl.ai/test-models/tensorflow/resnet50v1.zip"
"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip"
"https://resources.djl.ai/test-models/mxnet/ssd_resnet50.zip"
)

# only pytorch and onnx models
aarch_models_urls=(
"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip"
"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip"
"https://resources.djl.ai/test-models/pytorch/resnet18_serving_all.zip"
"https://resources.djl.ai/test-models/onnxruntime/resnet18-v1-7.zip"
)

inf_models_urls=(
"https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz"
"https://resources.djl.ai/test-models/pytorch/resnet18_inf1_1_12.tar.gz"
)

download()
{
download() {
urls=("$@")
for url in "${urls[@]}"
do
for url in "${urls[@]}"; do
filename=${url##*/}
# does not download the file, if file already exists
if ! [ -f "${filename}" ]; then
curl -O "$url"
curl -sf -O "$url"
fi
done
}

case $platform in
cpu|cpu-full|pytorch-cu117)
cpu | cpu-full | pytorch-cu117)
download "${general_platform_models_urls[@]}"
;;
pytorch-inf1)
Expand All @@ -48,5 +49,6 @@ aarch64)
;;
*)
echo "Bad argument. Expecting one of the values: cpu, cpu-full, pytorch-cu117, pytorch-inf1, aarch64"
exit 1;;
exit 1
;;
esac
13 changes: 11 additions & 2 deletions tests/integration/launch_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ elif [[ "$platform" == *"inf1"* ]]; then # if the platform is inferentia
host_device="/dev/neuron0"
fi

if [[ -f ${PWD}/docker_env ]]; then
env_file="--env-file ${PWD}/docker_env"
fi

rm -rf logs
mkdir logs

set -x
# start the docker container
container_id=$(docker run \
Expand All @@ -28,8 +35,10 @@ container_id=$(docker run \
--network="host" \
-v ${model_path}:/opt/ml/model \
-v ${PWD}/logs:/opt/djl/logs \
-v ~/.aws:/root/.aws \
-v ~/.aws:/home/djl/.aws \
${env_file} \
-e TEST_TELEMETRY_COLLECTION='true' \
-u djl \
${runtime:+--runtime="${runtime}"} \
${shm:+--shm-size="${shm}"} \
${host_device:+--device "${host_device}"} \
Expand All @@ -53,7 +62,7 @@ while true; do
echo "DJL serving started"
break
fi
if [[ "$( docker ps | wc -l )" == "1" ]]; then
if [[ "$(docker ps | wc -l)" == "1" ]]; then
echo "Docker container shut down"
exit 1
fi
Expand Down
49 changes: 29 additions & 20 deletions tests/integration/test_client.sh
Original file line number Diff line number Diff line change
@@ -1,36 +1,45 @@
#!/usr/bin/env bash

set -e
content_type=$1
content=$2
url=$3

url=$1
content_type=$2
content=$3
if [[ -z "$url" ]]; then
url="http://127.0.0.1:8080/predictions/test"
fi

echo "Testing $url with content type: $content_type ..."

if [[ "$content_type" == "tensor/ndlist" ]]; then
djl-bench ndlist-gen -s $content -o test.ndlist
curl -f -X POST $url \
-T test.ndlist \
-H "Content-type: $content_type"
rm -rf test.ndlist
curl -sf -m 2 -X POST $url -o out.ndlist -T test.ndlist -H "Content-type: $content_type"
ret=$?
rm -rf test.ndlist out.ndlist
elif [[ "$content_type" == "tensor/npz" ]]; then
djl-bench ndlist-gen -s $content -z -o test.npz
curl -f -X POST $url \
-T test.npz \
-H "Content-type: $content_type"
rm -rf test.npz
curl -sf -m 2 -X POST $url -o out.npz -T test.npz -H "Content-type: $content_type"
ret=$?
rm -rf test.npz out.npz
elif [[ "$content_type" == "text/plain" ]]; then
curl -f -X POST $url \
-d "$content" \
-H "Content-type: $content_type"
curl -sf -m 2 -X POST $url -d "$content" -H "Content-type: $content_type"
ret=$?
elif [[ "$content_type" == "image/jpg" ]]; then
curl $content -o test.jpg
curl -f -X POST $url \
-T test.jpg \
-H "Content-type: $content_type"
rm -rf test.jpg
curl -sf -m 2 -X POST $url -T $content -H "Content-type: $content_type"
ret=$?
else
echo "Content type $content_type not supported!"
exit 1
fi

if [[ -z "$EXPECT_TIMEOUT" ]]; then
if [[ $ret -ne 0 ]]; then
echo "Request failed: $ret"
exit 1
fi
else
if [[ $ret -ne 28 ]]; then
echo "Expecting time out, actual: $ret"
exit 1
fi
fi