diff --git a/.github/workflows/docker-publish-hf.yml b/.github/workflows/docker-publish-hf.yml
deleted file mode 100644
index 958ca696b..000000000
--- a/.github/workflows/docker-publish-hf.yml
+++ /dev/null
@@ -1,85 +0,0 @@
-name: Build and push HuggingFace TGI docker image
-
-on:
-  workflow_dispatch:
-    inputs:
-      tgi-version:
-        description: 'tgi version'
-        required: true
-        default: '0.5.0'
-
-jobs:
-  create-runner:
-    runs-on: [ self-hosted, scheduler ]
-    steps:
-      - name: Create new CPU instance
-        id: create_cpu
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
-          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
-          --fail \
-          | jq '.token' | tr -d '"' )
-          ./start_instance.sh action_cpu $token djl-serving
-    outputs:
-      cpu_instance_id: ${{ steps.create_cpu.outputs.action_cpu_instance_id }}
-
-  build-and-push-image:
-    runs-on: [ self-hosted, cpu ]
-    timeout-minutes: 150
-    needs: create-runner
-    env:
-      TGI_VERSION: ${{github.event.inputs.tgi-version}}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          repository: huggingface/text-generation-inference
-          ref: v${{ env.TGI_VERSION }}
-      - uses: actions/checkout@v3
-        with:
-          repository: aws/deep-learning-containers
-          path: deep-learning-containers
-      - name: Setup Docker buildx
-        uses: docker/setup-buildx-action@v2
-        with:
-          install: true
-      - name: Inject slug/short variables
-        uses: rlespinasse/github-slug-action@v4.4.1
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v2
-        with:
-          aws-region: us-east-1
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v1
-        with:
-          registries: "125045733377"
-      - name: Clean docker env
-        run: |
-          yes | docker system prune -a --volumes
-      - name: Build and push docker image
-        uses: docker/build-push-action@v4
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          REPOSITORY: djl-serving
-        with:
-          context: .
-          file: deep-learning-containers/huggingface/pytorch/tgi/docker/0.5/py3/cu118/Dockerfile.gpu
-          push: true
-          target: sagemaker
-          platforms: 'linux/amd64'
-          provenance: false
-          tags: ${{ env.REGISTRY }}/${{ env.REPOSITORY }}:tgi-${{ env.TGI_VERSION }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-  stop-runner:
-    if: always()
-    runs-on: [ self-hosted, scheduler ]
-    needs: [build-and-push-image, create-runner]
-    steps:
-      - name: Stop all instances
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          instance_id=${{ needs.create-runner.outputs.cpu_instance_id }}
-          ./stop_instance.sh $instance_id
\ No newline at end of file
diff --git a/.github/workflows/hf_tgi_integration.yml b/.github/workflows/hf_tgi_integration.yml
deleted file mode 100644
index 7433a7752..000000000
--- a/.github/workflows/hf_tgi_integration.yml
+++ /dev/null
@@ -1,114 +0,0 @@
-name: Huggingface TGI integration tests
-
-on:
-  workflow_dispatch:
-    inputs:
-      tgi-version:
-        description: 'tgi version'
-        required: true
-        default: '0.5.0'
-
-jobs:
-  create-runners:
-    runs-on: [self-hosted, scheduler]
-    steps:
-      - name: Create new G5 instance
-        id: create_gpu
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
-          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
-          --fail \
-          | jq '.token' | tr -d '"' )
-          ./start_instance.sh action_g5 $token djl-serving
-    outputs:
-      gpu_instance_id: ${{ steps.create_gpu.outputs.action_g5_instance_id }}
-
-  hf-tgi-test:
-    runs-on: [ self-hosted, g5 ]
-    timeout-minutes: 30
-    needs: create-runners
-    env:
-      TGI_VERSION: ${{github.event.inputs.tgi-version}}
-    steps:
-      - uses: actions/checkout@v3
-      - name: Clean env
-        run: |
-          yes | docker system prune -a --volumes
-          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
-          echo "wait dpkg lock..."
-          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v2
-        with:
-          aws-region: us-east-1
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v1
-        with:
-          registries: "125045733377"
-      - name: Pull docker
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-        run: |
-          docker pull ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
-      - name: Test bloom-560m
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-        run: |
-          HF_MODEL_ID=bigscience/bloom-560m && \
-          SM_NUM_GPUS=4 && \
-          TGI_VERSION=$TGI_VERSION && \
-          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
-              -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
-          sleep 30
-          ret=$(curl http://localhost:8080/invocations -X POST \
-              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
-              -H 'Content-Type: application/json')
-          [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
-          docker rm -f $(docker ps -aq)
-      - name: Test gpt-neox-20b
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-        run: |
-          HF_MODEL_ID=EleutherAI/gpt-neox-20b && \
-          SM_NUM_GPUS=4 && \
-          TGI_VERSION=$TGI_VERSION && \
-          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
-              -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
-          sleep 120
-          ret=$(curl http://localhost:8080/invocations -X POST \
-              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
-              -H 'Content-Type: application/json')
-          [[ $ret != "[{\"generated_text\":\"What is Deep Learning?"* ]] && exit 1
-          docker rm -f $(docker ps -aq)
-      - name: Test flan-t5-xxl
-        env:
-          REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-        run: |
-          HF_MODEL_ID=google/flan-t5-xxl && \
-          SM_NUM_GPUS=4 && \
-          TGI_VERSION=$TGI_VERSION && \
-          docker run --gpus all --shm-size 2g -itd --rm -p 8080:8080 \
-              -e SM_NUM_GPUS=$SM_NUM_GPUS -e HF_MODEL_ID=$HF_MODEL_ID ${REGISTRY}/djl-serving:tgi-${TGI_VERSION}
-          sleep 180
-          ret=$(curl http://localhost:8080/invocations -X POST \
-              -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \
-              -H 'Content-Type: application/json')
-          [[ $ret != "[{\"generated_text\""* ]] && exit 1
-          docker rm -f $(docker ps -aq)
-      - name: On fail step
-        if: ${{ failure() }}
-        run: |
-          docker rm -f $(docker ps -aq) || true
-
-  stop-runners:
-    if: always()
-    runs-on: [ self-hosted, scheduler ]
-    needs: [ create-runners, hf-tgi-test ]
-    steps:
-      - name: Stop all instances
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          instance_id=${{ needs.create-runners.outputs.gpu_instance_id }}
-          ./stop_instance.sh $instance_id
\ No newline at end of file