Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions .github/workflows/build-container.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,43 @@ concurrency:
cancel-in-progress: true

jobs:
starlette-cpu:
starlette-pytorch-cpu:
uses: ./.github/workflows/docker-build-action.yaml
with:
context: ./
repository: huggingface
image: hf-endpoints-inference-cpu
dockerfile: dockerfiles/starlette/Dockerfile.cpu
image: hf-endpoints-inference-pytorch-cpu
dockerfile: dockerfiles/starlette/pytorch/Dockerfile.cpu
secrets:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
starlette-gpu:
starlette-pytorch-gpu:
uses: ./.github/workflows/docker-build-action.yaml
with:
context: ./
repository: huggingface
image: hf-endpoints-inference-gpu
dockerfile: dockerfiles/starlette/Dockerfile.gpu
image: hf-endpoints-inference-pytorch-gpu
dockerfile: dockerfiles/starlette/pytorch/Dockerfile.gpu
secrets:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
starlette-tensorflow-cpu:
uses: ./.github/workflows/docker-build-action.yaml
with:
context: ./
repository: huggingface
image: hf-endpoints-inference-tensorflow-cpu
dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.cpu
secrets:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
starlette-tensorflow-gpu:
uses: ./.github/workflows/docker-build-action.yaml
with:
context: ./
repository: huggingface
image: hf-endpoints-inference-tensorflow-gpu
dockerfile: dockerfiles/starlette/tensorflow/Dockerfile.gpu
secrets:
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
114 changes: 114 additions & 0 deletions .github/workflows/gpu-integ-test.xxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
name: GPU - Run Integration Tests

on:
push:
branches:
- main
pull_request:
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true


jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-0dc1c26161f869ed1
EC2_INSTANCE_TYPE: g4dn.xlarge
EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180
EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13
EC2_IAM_ROLE: optimum-ec2-github-actions-role
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ env.EC2_AMI_ID }}
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
subnet-id: ${{ env.EC2_SUBNET_ID }}
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
iam-role-name: ${{ env.EC2_IAM_ROLE }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "ec2-optimum-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
]
pytorch-integration-test:
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
AWS_REGION: us-east-1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install Python dependencies
run: pip install -e .[test,dev,torch]
- name: Build Docker
run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: make integ-test
tensorflow-integration-test:
needs:
- start-runner
- pytorch-integration-test
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
AWS_REGION: us-east-1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install Python dependencies
run: pip install -e .[test,dev,tensorflow]
- name: Build Docker
run: docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: make integ-test

stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- pytorch-integration-test
- tensorflow-integration-test
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Stop EC2 runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run Integration Tests
name: CPU - Run Integration Tests

on:
push:
Expand All @@ -13,7 +13,7 @@ concurrency:


jobs:
test:
pytorch-integration-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand All @@ -24,7 +24,25 @@ jobs:
- name: Install Python dependencies
run: pip install -e .[test,dev,torch]
- name: Build Docker
run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/Dockerfile.cpu .
run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
run: make integ-test
tensorflow-integration-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install Python dependencies
run: pip install -e .[test,dev,tensorflow]
- name: Build Docker
run: docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu .
- name: Run Integration Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ concurrency:
cancel-in-progress: true

jobs:
test:
pytorch-unit-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand All @@ -21,7 +21,7 @@ jobs:
with:
python-version: 3.9.12
- name: Install Python dependencies
run: pip install -e .[test,dev,torch]
run: pip install -e .[test,dev,torch,st]
- name: Run Unit test_const
run: python -m pytest -s -v ./tests/unit/test_const.py
- name: Run Unit test_handler
Expand All @@ -32,5 +32,25 @@ jobs:
run: python -m pytest -s -v ./tests/unit/test_serializer.py
- name: Run Unit test_utils
run: python -m pytest -s -v ./tests/unit/test_utils.py
# - name: Run Unit Tests
# run: make unit test
tensorflow-unit-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9.12
- name: Install Python dependencies
run: pip install -e .[test,dev,tensorflow]
- name: Run Unit test_const
run: python -m pytest -s -v ./tests/unit/test_const.py
- name: Run Unit test_handler
run: python -m pytest -s -v ./tests/unit/test_handler.py
- name: Run Unit test_sentence_transformers
run: python -m pytest -s -v ./tests/unit/test_sentence_transformers.py
- name: Run Unit test_serializer
run: python -m pytest -s -v ./tests/unit/test_serializer.py
- name: Run Unit test_utils
run: python -m pytest -s -v ./tests/unit/test_utils.py


10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ mkdir tmp2/
HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK=text-classification uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 5000
```

HF_MODEL_ID=RobertoFont/pegasus-large-samsum HF_MODEL_DIR=tmp HF_TASK=text2text-generation uvicorn src.huggingface_inference_toolkit.webservice_starlette:app --port 5000

### Container


1. build the preferred container for either CPU or GPU.
1. build the preferred container for either CPU or GPU for PyTorch or TensorFlow.

_cpu images_
```bash
docker build -t starlette-transformers:cpu -f dockerfiles/starlette/Dockerfile.cpu .
docker build -t starlette-transformers:cpu -f dockerfiles/starlette/pytorch/Dockerfile.cpu .
docker build -t starlette-transformers:cpu -f dockerfiles/starlette/tensorflow/Dockerfile.cpu .
```

_gpu images_
```bash
docker build -t starlette-transformers:gpu -f dockerfiles/starlette/Dockerfile.gpu .
docker build -t starlette-transformers:gpu -f dockerfiles/starlette/pytorch/Dockerfile.gpu .
docker build -t starlette-transformers:gpu -f dockerfiles/starlette/tensorflow/Dockerfile.gpu .
```

2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
Expand Down
16 changes: 16 additions & 0 deletions dockerfiles/starlette/pytorch/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM huggingface/transformers-inference:4.20.1-pt1.11-cpu

# install starlette framework
COPY starlette_requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13
RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0"

# copy application
COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py

# run app
ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"]


14 changes: 14 additions & 0 deletions dockerfiles/starlette/pytorch/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM huggingface/transformers-inference:4.20.1-pt1.11-cuda11.5

# install starlette framework
COPY starlette_requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
# Think about a better solution -> base contaienr has pt 1.11. thats why need below 0.13
RUN pip install --no-cache-dir sentence_transformers torchvision~="0.12.0"

# copy application
COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py

# run app
ENTRYPOINT ["uvicorn", "webservice_starlette:app", "--host", "0.0.0.0", "--port", "5000"]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM huggingface/transformers-inference:4.20.1-pt1.11-cpu
FROM huggingface/transformers-inference:4.21.1-tf2.9-cpu

# install starlette framework
COPY starlette_requirements.txt /tmp/requirements.txt
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM huggingface/transformers-inference:4.20.1-pt1.11-cuda11.5
FROM huggingface/transformers-inference:4.21.1-tf2.9-cuda11.2

# install starlette framework
COPY starlette_requirements.txt /tmp/requirements.txt
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@
"librosa",
"pyctcdecode>=0.3.0",
"phonemizer",
# sentence transformers
"sentence_transformers",
"torchvision<=0.12.0",
]

extras = {}

extras["st"] = ["sentence_transformers"]


# Hugging Face specific dependencies
# framework specific dependencies
extras["torch"] = ["torch>=1.8.0", "torchaudio"]
Expand Down
2 changes: 1 addition & 1 deletion src/huggingface_inference_toolkit/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
HF_MODEL_DIR = os.environ.get("HF_MODEL_DIR", "/opt/huggingface/model")
HF_MODEL_ID = os.environ.get("HF_MODEL_ID", None)
HF_TASK = os.environ.get("HF_TASK", None)
HF_FRAMEWORK = os.environ.get("HF_FRAMEWORK", "pytorch")
HF_FRAMEWORK = os.environ.get("HF_FRAMEWORK", None)
HF_REVISION = os.environ.get("HF_REVISION", None)
HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", None)
# custom handler consts
Expand Down
13 changes: 12 additions & 1 deletion src/huggingface_inference_toolkit/sentence_transformers_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from sentence_transformers import CrossEncoder, SentenceTransformer, util
import importlib.util


_sentence_transformers = importlib.util.find_spec("sentence_transformers") is not None


def is_sentence_transformers_available():
return _sentence_transformers


if is_sentence_transformers_available():
from sentence_transformers import CrossEncoder, SentenceTransformer, util


class SentenceSimilarityPipeline:
Expand Down
Loading