Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spaCy to inference API #63

Merged
merged 6 commits into from
Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"sentence-similarity",
"text-generation",
"text-to-speech",
"token-classification",
}


Expand Down
29 changes: 29 additions & 0 deletions api-inference-community/docker_images/spacy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM tiangolo/uvicorn-gunicorn:python3.8
LABEL maintainer="Omar Sanseviero omar@huggingface.com"

# Add any system dependency here
# RUN apt-get update -y && apt-get install libXXX -y

COPY ./requirements.txt /app
RUN pip install --no-cache-dir -r requirements.txt
COPY ./prestart.sh /app/


# Most DL models are quite large in terms of memory, using workers is a HUGE
# slowdown because of the fork and GIL with python.
# Using multiple pods seems like a better default strategy.
# Feel free to override if it does not make sense for your library.
ARG max_workers=1
ENV MAX_WORKERS=$max_workers
ENV HUGGINGFACE_HUB_CACHE=/data
osanseviero marked this conversation as resolved.
Show resolved Hide resolved

# Necessary on GPU environment docker.
# TIMEOUT env variable is used by nvcr.io/nvidia/pytorch:xx for another purpose
# rendering TIMEOUT defined by uvicorn impossible to use correctly
# We're overriding it to be renamed UVICORN_TIMEOUT
# UVICORN_TIMEOUT is a useful variable for very large models that take more
# than 30s (the default) to load in memory.
# If UVICORN_TIMEOUT is too low, uvicorn will simply never loads as it will
# kill workers all the time before they finish.
RUN sed -i 's/TIMEOUT/UVICORN_TIMEOUT/g' /gunicorn_conf.py
COPY ./app /app/app
Empty file.
73 changes: 73 additions & 0 deletions api-inference-community/docker_images/spacy/app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging
import os
from typing import Dict, Type

from api_inference_community.routes import pipeline_route, status_ok
from app.pipelines import Pipeline, TokenClassificationPipeline
from starlette.applications import Starlette
from starlette.routing import Route


TASK = os.getenv("TASK")
MODEL_ID = os.getenv("MODEL_ID")


logger = logging.getLogger(__name__)


# Add the allowed tasks
# Supported tasks are:
# - text-generation
# - text-classification
# - token-classification
# - translation
# - summarization
# - automatic-speech-recognition
# - ...
# For instance
# from app.pipelines import AutomaticSpeechRecognitionPipeline
# ALLOWED_TASKS = {"automatic-speech-recognition": AutomaticSpeechRecognitionPipeline}
# You can check the requirements and expectations of each pipelines in their respective
# directories. Implement directly within the directories.
ALLOWED_TASKS: Dict[str, Type[Pipeline]] = {
"token-classification": TokenClassificationPipeline
}


def get_pipeline(task: str, model_id: str) -> Pipeline:
if task not in ALLOWED_TASKS:
raise EnvironmentError(f"{task} is not a valid pipeline for model : {model_id}")
return ALLOWED_TASKS[task](model_id)


routes = [
Route("/{whatever:path}", status_ok),
Route("/{whatever:path}", pipeline_route, methods=["POST"]),
]

app = Starlette(routes=routes)
if os.environ.get("DEBUG", "") == "1":
from starlette.middleware.cors import CORSMiddleware

app.add_middleware(
CORSMiddleware, allow_origins=["*"], allow_headers=["*"], allow_methods=["*"]
)


@app.on_event("startup")
async def startup_event():
logger = logging.getLogger("uvicorn.access")
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logger.handlers = [handler]

task = os.environ["TASK"]
model_id = os.environ["MODEL_ID"]
app.pipeline = get_pipeline(task, model_id)


if __name__ == "__main__":
task = os.environ["TASK"]
model_id = os.environ["MODEL_ID"]

get_pipeline(task, model_id)
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from app.pipelines.base import Pipeline, PipelineException # isort:skip

from app.pipelines.audio_source_separation import AudioSourceSeparationPipeline
from app.pipelines.automatic_speech_recognition import (
AutomaticSpeechRecognitionPipeline,
)
from app.pipelines.feature_extraction import FeatureExtractionPipeline
from app.pipelines.image_classification import ImageClassificationPipeline
from app.pipelines.question_answering import QuestionAnsweringPipeline
from app.pipelines.sentence_similarity import SentenceSimilarityPipeline
from app.pipelines.text_to_speech import TextToSpeechPipeline
from app.pipelines.token_classification import TokenClassificationPipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Tuple

import numpy as np
from app.pipelines import Pipeline


class AudioSourceSeparationPipeline(Pipeline):
def __init__(self, model_id: str):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
# IMPLEMENT_THIS : Please define a `self.sampling_rate` for this pipeline
# to automatically read the input correctly
self.sampling_rate = 16000
raise NotImplementedError(
"Please implement AudioSourceSeparationPipeline __init__ function"
)

def __call__(self, inputs: np.array) -> Tuple[np.array, int]:
"""
Args:
inputs (:obj:`np.array`):
The raw waveform of audio received. By default at 16KHz.
Check `app.validation` if a different sample rate is required
or if it depends on the model
Return:
A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement AudioSourceSeparationPipeline __call__ function"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Dict

import numpy as np
from app.pipelines import Pipeline


class AutomaticSpeechRecognitionPipeline(Pipeline):
def __init__(self, model_id: str):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
# IMPLEMENT_THIS : Please define a `self.sampling_rate` for this pipeline
# to automatically read the input correctly
self.sampling_rate = 16000
raise NotImplementedError(
"Please implement AutomaticSpeechRecognitionPipeline __init__ function"
)

def __call__(self, inputs: np.array) -> Dict[str, str]:
"""
Args:
inputs (:obj:`np.array`):
The raw waveform of audio received. By default at 16KHz.
Check `app.validation` if a different sample rate is required
or if it depends on the model
Return:
A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
the detected langage from the input audio
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement AutomaticSpeechRecognitionPipeline __call__ function"
)
16 changes: 16 additions & 0 deletions api-inference-community/docker_images/spacy/app/pipelines/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from abc import ABC, abstractmethod
from typing import Any


class Pipeline(ABC):
@abstractmethod
def __init__(self, model_id: str):
raise NotImplementedError("Pipelines should implement an __init__ method")

@abstractmethod
def __call__(self, inputs: Any) -> Any:
raise NotImplementedError("Pipelines should implement a __call__ method")


class PipelineException(Exception):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import List

from app.pipelines import Pipeline


class FeatureExtractionPipeline(Pipeline):
def __init__(
self,
model_id: str,
):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
raise NotImplementedError(
"Please implement FeatureExtractionPipeline __init__ function"
)

def __call__(self, inputs: str) -> List[float]:
"""
Args:
inputs (:obj:`str`):
a string to get the features of.
Return:
A :obj:`list` of floats: The features computed by the model.
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement FeatureExtractionPipeline __call__ function"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import TYPE_CHECKING, Any, Dict, List

from app.pipelines import Pipeline


if TYPE_CHECKING:
from PIL import Image


class ImageClassificationPipeline(Pipeline):
def __init__(self, model_id: str):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
raise NotImplementedError(
"Please implement ImageClassificationPipeline __init__ function"
)

def __call__(self, inputs: "Image.Image") -> List[Dict[str, Any]]:
"""
Args:
inputs (:obj:`PIL.Image`):
The raw image representation as PIL.
No transformation made whatsoever from the input. Make all necessary transformations here.
Return:
A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82}
It is preferred if the returned list is in decreasing `score` order
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement ImageClassificationPipeline __call__ function"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Any, Dict

from app.pipelines import Pipeline


class QuestionAnsweringPipeline(Pipeline):
def __init__(
self,
model_id: str,
):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
raise NotImplementedError(
"Please implement QuestionAnsweringPipeline __init__ function"
)

def __call__(self, inputs: Dict[str, str]) -> Dict[str, Any]:
"""
Args:
inputs (:obj:`dict`):
a dictionnary containing two keys, 'question' being the question being asked and 'context' being some text containing the answer.
Return:
A :obj:`dict`:. The object return should be like {"answer": "XXX", "start": 3, "end": 6, "score": 0.82} containing :
- "answer": the extracted answer from the `context`.
- "start": the offset within `context` leading to `answer`. context[start:stop] == answer
- "end": the ending offset within `context` leading to `answer`. context[start:stop] === answer
- "score": A score between 0 and 1 describing how confident the model is for this answer.
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement QuestionAnsweringPipeline __call__ function"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Dict, List, Union

from app.pipelines import Pipeline


class SentenceSimilarityPipeline(Pipeline):
def __init__(
self,
model_id: str,
):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
raise NotImplementedError(
"Please implement SentenceSimilarityPipeline __init__ function"
)

def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]:
"""
Args:
inputs (:obj:`dict`):
a dictionary containing two keys, 'source_sentence' mapping
to the sentence that will be compared against all the others,
and 'sentences', mapping to a list of strings to which the
source will be compared.
Return:
A :obj:`list` of floats: Some similarity measure between `source_sentence` and each sentence from `sentences`.
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement SentenceSimilarityPipeline __call__ function"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Tuple

import numpy as np
from app.pipelines import Pipeline


class TextToSpeechPipeline(Pipeline):
def __init__(self, model_id: str):
# IMPLEMENT_THIS
# Preload all the elements you are going to need at inference.
# For instance your model, processors, tokenizer that might be needed.
# This function is only called once, so do all the heavy processing I/O here
raise NotImplementedError(
"Please implement TextToSpeechPipeline __init__ function"
)

def __call__(self, inputs: str) -> Tuple[np.array, int]:
"""
Args:
inputs (:obj:`str`):
The text to generate audio from
Return:
A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
"""
# IMPLEMENT_THIS
raise NotImplementedError(
"Please implement TextToSpeechPipeline __call__ function"
)
Loading