diff --git a/.github/workflows/docker-publish-hf.yml b/.github/workflows/docker-publish-hf.yml deleted file mode 100644 index 958ca696b..000000000 --- a/.github/workflows/docker-publish-hf.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: Build and push HuggingFace TGI docker image - -on: - workflow_dispatch: - inputs: - tgi-version: - description: 'tgi version' - required: true - default: '0.5.0' - -jobs: - create-runner: - runs-on: [ self-hosted, scheduler ] - steps: - - name: Create new CPU instance - id: create_cpu - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_cpu $token djl-serving - outputs: - cpu_instance_id: ${{ steps.create_cpu.outputs.action_cpu_instance_id }} - - build-and-push-image: - runs-on: [ self-hosted, cpu ] - timeout-minutes: 150 - needs: create-runner - env: - TGI_VERSION: ${{github.event.inputs.tgi-version}} - steps: - - uses: actions/checkout@v3 - with: - repository: huggingface/text-generation-inference - ref: v${{ env.TGI_VERSION }} - - uses: actions/checkout@v3 - with: - repository: aws/deep-learning-containers - path: deep-learning-containers - - name: Setup Docker buildx - uses: docker/setup-buildx-action@v2 - with: - install: true - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - aws-region: us-east-1 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - with: - registries: "125045733377" - - name: Clean docker env - run: | - yes | docker system prune -a --volumes - - name: Build and push docker image - uses: docker/build-push-action@v4 - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - REPOSITORY: djl-serving - with: - context: . - file: deep-learning-containers/huggingface/pytorch/tgi/docker/0.5/py3/cu118/Dockerfile.gpu - push: true - target: sagemaker - platforms: 'linux/amd64' - provenance: false - tags: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}:tgi-${{ env.TGI_VERSION }} - cache-from: type=gha - cache-to: type=gha,mode=max - - stop-runner: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [build-and-push-image, create-runner] - steps: - - name: Stop all instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runner.outputs.cpu_instance_id }} - ./stop_instance.sh $instance_id \ No newline at end of file diff --git a/.github/workflows/hf_tgi_integration.yml b/.github/workflows/hf_tgi_integration.yml deleted file mode 100644 index 7433a7752..000000000 --- a/.github/workflows/hf_tgi_integration.yml +++ /dev/null @@ -1,114 +0,0 @@ -name: Huggingface TGI integration tests - -on: - workflow_dispatch: - inputs: - tgi-version: - description: 'tgi version' - required: true - default: '0.5.0' - -jobs: - create-runners: - runs-on: [self-hosted, scheduler] - steps: - - name: Create new G5 instance - id: create_gpu - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_g5 $token djl-serving - outputs: - gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }} - - hf-tgi-test: - runs-on: [ self-hosted, g5 ] - timeout-minutes: 30 - needs: create-runners - env: - TGI_VERSION: ${{github.event.inputs.tgi-version}} - steps: - - uses: actions/checkout@v3 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 - with: - aws-region: us-east-1 - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - with: - registries: "125045733377" - - name: Pull docker - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - run: | - docker pull ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} - - name: Test bloom-560m - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - run: | - HF_MODEL_ID=bigscience/bloom-560m && \ - SM_NUM_GPUS=4 && \ - TGI_VERSION=$TGI_VERSION && \ - docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ - -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} - sleep 30 - ret=$(curl http://localhost:8080/invocations -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ - -H 'Content-Type: application/json') - [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 - docker rm -f $(docker ps -aq) - - name: Test gpt-neox-20b - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - run: | - HF_MODEL_ID=EleutherAI/gpt-neox-20b && \ - SM_NUM_GPUS=4 && \ - TGI_VERSION=$TGI_VERSION && \ - docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ - -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} - sleep 120 - ret=$(curl http://localhost:8080/invocations -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ - -H 'Content-Type: application/json') - [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1 - docker rm -f $(docker ps -aq) - - name: Test flan-t5-xxl - env: - REGISTRY: ${{ steps.login-ecr.outputs.registry }} - run: | - HF_MODEL_ID=google/flan-t5-xxl && \ - SM_NUM_GPUS=4 && \ - TGI_VERSION=$TGI_VERSION && \ - docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \ - -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION} - sleep 180 - ret=$(curl http://localhost:8080/invocations -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ - -H 'Content-Type: application/json') - [[ $ret != "[{\"generated_text\""* ]] && exit 1 - docker rm -f $(docker ps -aq) - - name: On fail step - if: ${{ failure() }} - run: | - docker rm -f $(docker ps -aq) || true - - stop-runners: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, hf-tgi-test ] - steps: - - name: Stop all instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runners.outputs.gpu_instance_id }} - ./stop_instance.sh $instance_id \ No newline at end of file