diff --git a/python_websocket_server/Dockerfile b/python_websocket_server/Dockerfile index 7072991..34fe389 100644 --- a/python_websocket_server/Dockerfile +++ b/python_websocket_server/Dockerfile @@ -1,7 +1,7 @@ FROM tensorflow/tensorflow:1.15.2-py3 -ARG DEEPSPEECH_CONTAINER_DIR=/opt/deepspeech -ARG DEEPSPEECH_VERSION=0.8.2 +ARG STT_CONTAINER_DIR=/opt/stt +ARG STT_MODEL_ID=english/coqui/v1.0.0-huge-vocab # Install OS dependencies RUN apt-get update && \ @@ -9,13 +9,13 @@ RUN apt-get update && \ apt-get clean # Create app directory -RUN mkdir -p ${DEEPSPEECH_CONTAINER_DIR} +RUN mkdir -p ${STT_CONTAINER_DIR} # Get pre-trained model -RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.pbmm" \ - -O ${DEEPSPEECH_CONTAINER_DIR}/model.pbmm -RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" \ - -O ${DEEPSPEECH_CONTAINER_DIR}/scorer.scorer +RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/model.tflite" \ + -O ${STT_CONTAINER_DIR}/model.tflite +RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/huge-vocabulary.scorer" \ + -O ${STT_CONTAINER_DIR}/scorer.scorer # Install Python dependencies RUN pip3 install --upgrade pip @@ -24,9 +24,9 @@ COPY requirements.txt /tmp RUN pip3 install -r /tmp/requirements.txt # Copy code and configs -COPY deepspeech_server ${DEEPSPEECH_CONTAINER_DIR}/deepspeech_server -COPY application.conf ${DEEPSPEECH_CONTAINER_DIR} +COPY stt_server ${STT_CONTAINER_DIR}/stt_server +COPY application.conf ${STT_CONTAINER_DIR} -WORKDIR ${DEEPSPEECH_CONTAINER_DIR} +WORKDIR ${STT_CONTAINER_DIR} -ENTRYPOINT python -m deepspeech_server.app +ENTRYPOINT python -m stt_server.app diff --git a/python_websocket_server/README.md b/python_websocket_server/README.md index a35d41f..1215cff 100644 --- a/python_websocket_server/README.md +++ b/python_websocket_server/README.md @@ -1,7 +1,7 @@ # Python websocket-based server This directory contains a simple service that receives audio data from clients, and serves the results -of DeepSpeech inference over a websocket. The server code in this project is a modified version of +of STT inference over a websocket. The server code in this project is a modified version of [this GitHub project](https://github.com/zelo/deepspeech-rest-api). Because STT transcriptions can typically be considered "long running tasks", using websockets for client-server @@ -22,7 +22,7 @@ Server configuration is specified in the [`application.conf`](application.conf) Make sure your model and scorer files are present in the same directory as the `application.conf` file. Then execute: ``` -python -m deepspeech_server.app +python -m stt_server.app ``` ### Sending requests to server @@ -65,7 +65,7 @@ Example output: ### Kubernetes The [helm](helm) directory contains an example Helm deployment, that configures an Nginx ingress to expose the -DeepSpeech service. The websocket timeout on the ingress is set to 1 hour. +STT service. The websocket timeout on the ingress is set to 1 hour. ## Contributing diff --git a/python_websocket_server/application.conf b/python_websocket_server/application.conf index 3d06c4d..feb6aec 100644 --- a/python_websocket_server/application.conf +++ b/python_websocket_server/application.conf @@ -1,4 +1,4 @@ -deepspeech { +stt { model = "model.tflite" scorer = "scorer.scorer" } diff --git a/python_websocket_server/helm/deepspeech-server/.helmignore b/python_websocket_server/helm/stt_server/.helmignore similarity index 100% rename from python_websocket_server/helm/deepspeech-server/.helmignore rename to python_websocket_server/helm/stt_server/.helmignore diff --git a/python_websocket_server/helm/deepspeech-server/Chart.yaml b/python_websocket_server/helm/stt_server/Chart.yaml similarity index 92% rename from python_websocket_server/helm/deepspeech-server/Chart.yaml rename to python_websocket_server/helm/stt_server/Chart.yaml index e90f283..35ef257 100644 --- a/python_websocket_server/helm/deepspeech-server/Chart.yaml +++ b/python_websocket_server/helm/stt_server/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -name: deepspeech-server -description: A server for DeepSpeech +name: stt-server +description: A server for Coqui STT # A chart can be either an 'application' or a 'library' chart. # diff --git a/python_websocket_server/helm/deepspeech-server/overrides/values.dev.yaml b/python_websocket_server/helm/stt_server/overrides/values.dev.yaml similarity index 95% rename from python_websocket_server/helm/deepspeech-server/overrides/values.dev.yaml rename to python_websocket_server/helm/stt_server/overrides/values.dev.yaml index 190ebc9..923e96a 100644 --- a/python_websocket_server/helm/deepspeech-server/overrides/values.dev.yaml +++ b/python_websocket_server/helm/stt_server/overrides/values.dev.yaml @@ -1,4 +1,4 @@ -# Default values for deepspeech-server. +# Default values for stt-server. # This is a YAML-formatted file. # Declare variables to be passed into your templates. @@ -41,7 +41,7 @@ ingress: nginx.ingress.kubernetes.io/proxy-body-size: 20m hosts: - host: "" - paths: ["/deepspeech-server"] + paths: ["/stt-server"] tls: - secretName: "" hosts: diff --git a/python_websocket_server/helm/deepspeech-server/overrides/values.prod.yaml b/python_websocket_server/helm/stt_server/overrides/values.prod.yaml similarity index 95% rename from python_websocket_server/helm/deepspeech-server/overrides/values.prod.yaml rename to python_websocket_server/helm/stt_server/overrides/values.prod.yaml index 211aeec..bcbd9c7 100644 --- a/python_websocket_server/helm/deepspeech-server/overrides/values.prod.yaml +++ b/python_websocket_server/helm/stt_server/overrides/values.prod.yaml @@ -1,4 +1,4 @@ -# Default values for deepspeech-server. +# Default values for stt-server. # This is a YAML-formatted file. # Declare variables to be passed into your templates. @@ -41,7 +41,7 @@ ingress: nginx.ingress.kubernetes.io/proxy-body-size: 20m hosts: - host: "" - paths: ["/deepspeech-server"] + paths: ["/stt-server"] tls: - secretName: "" hosts: diff --git a/python_websocket_server/helm/deepspeech-server/templates/_helpers.tpl b/python_websocket_server/helm/stt_server/templates/_helpers.tpl similarity index 72% rename from python_websocket_server/helm/deepspeech-server/templates/_helpers.tpl rename to python_websocket_server/helm/stt_server/templates/_helpers.tpl index 4fa4e80..aff9572 100644 --- a/python_websocket_server/helm/deepspeech-server/templates/_helpers.tpl +++ b/python_websocket_server/helm/stt_server/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "deepspeech-server.name" -}} +{{- define "stt-server.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "deepspeech-server.fullname" -}} +{{- define "stt-server.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "deepspeech-server.chart" -}} +{{- define "stt-server.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "deepspeech-server.labels" -}} -helm.sh/chart: {{ include "deepspeech-server.chart" . }} -{{ include "deepspeech-server.selectorLabels" . }} +{{- define "stt-server.labels" -}} +helm.sh/chart: {{ include "stt-server.chart" . }} +{{ include "stt-server.selectorLabels" . }} app.kubernetes.io/version: {{ .Values.image.tag | default .Chart.AppVersion | quote }} app.kubernetes.io/managed-by: {{ .Release.Service }} date: "{{ now | unixEpoch }}" @@ -44,17 +44,17 @@ date: "{{ now | unixEpoch }}" {{/* Selector labels */}} -{{- define "deepspeech-server.selectorLabels" -}} -app.kubernetes.io/name: {{ include "deepspeech-server.name" . }} +{{- define "stt-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "stt-server.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "deepspeech-server.serviceAccountName" -}} +{{- define "stt-server.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "deepspeech-server.fullname" .) .Values.serviceAccount.name }} +{{- default (include "stt-server.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} diff --git a/python_websocket_server/helm/deepspeech-server/templates/deployment.yaml b/python_websocket_server/helm/stt_server/templates/deployment.yaml similarity index 85% rename from python_websocket_server/helm/deepspeech-server/templates/deployment.yaml rename to python_websocket_server/helm/stt_server/templates/deployment.yaml index 484ea2c..4b2e8ce 100644 --- a/python_websocket_server/helm/deepspeech-server/templates/deployment.yaml +++ b/python_websocket_server/helm/stt_server/templates/deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "deepspeech-server.fullname" . }} + name: {{ include "stt-server.fullname" . }} labels: - {{- include "deepspeech-server.labels" . | nindent 4 }} + {{- include "stt-server.labels" . | nindent 4 }} spec: {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} {{- end }} selector: matchLabels: - {{- include "deepspeech-server.selectorLabels" . | nindent 6 }} + {{- include "stt-server.selectorLabels" . | nindent 6 }} template: metadata: {{- with .Values.podAnnotations }} @@ -18,7 +18,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} labels: - {{- include "deepspeech-server.selectorLabels" . | nindent 8 }} + {{- include "stt-server.selectorLabels" . | nindent 8 }} spec: {{- with .Values.imagePullSecrets }} imagePullSecrets: diff --git a/python_websocket_server/helm/deepspeech-server/templates/ingress.yaml b/python_websocket_server/helm/stt_server/templates/ingress.yaml similarity index 87% rename from python_websocket_server/helm/deepspeech-server/templates/ingress.yaml rename to python_websocket_server/helm/stt_server/templates/ingress.yaml index 984253c..19bc6ab 100644 --- a/python_websocket_server/helm/deepspeech-server/templates/ingress.yaml +++ b/python_websocket_server/helm/stt_server/templates/ingress.yaml @@ -1,12 +1,12 @@ {{- if .Values.ingress.enabled -}} - {{- $fullName := include "deepspeech-server.fullname" . -}} + {{- $fullName := include "stt-server.fullname" . -}} {{- $svcPort := .Values.service.port -}} apiVersion: extensions/v1beta1 kind: Ingress metadata: name: {{ $fullName }} labels: - {{- include "deepspeech-server.labels" . | nindent 4 }} + {{- include "stt-server.labels" . | nindent 4 }} {{- with .Values.ingress.annotations }} annotations: {{- toYaml . | nindent 4 }} diff --git a/python_websocket_server/helm/deepspeech-server/templates/service.yaml b/python_websocket_server/helm/stt_server/templates/service.yaml similarity index 53% rename from python_websocket_server/helm/deepspeech-server/templates/service.yaml rename to python_websocket_server/helm/stt_server/templates/service.yaml index b0f3f55..b717148 100644 --- a/python_websocket_server/helm/deepspeech-server/templates/service.yaml +++ b/python_websocket_server/helm/stt_server/templates/service.yaml @@ -1,9 +1,9 @@ apiVersion: v1 kind: Service metadata: - name: {{ include "deepspeech-server.fullname" . }} + name: {{ include "stt-server.fullname" . }} labels: - {{- include "deepspeech-server.labels" . | nindent 4 }} + {{- include "stt-server.labels" . | nindent 4 }} spec: type: {{ .Values.service.type }} ports: @@ -12,4 +12,4 @@ spec: protocol: TCP name: http selector: - {{- include "deepspeech-server.selectorLabels" . | nindent 4 }} + {{- include "stt-server.selectorLabels" . | nindent 4 }} diff --git a/python_websocket_server/helm/deepspeech-server/values.yaml b/python_websocket_server/helm/stt_server/values.yaml similarity index 96% rename from python_websocket_server/helm/deepspeech-server/values.yaml rename to python_websocket_server/helm/stt_server/values.yaml index cafea09..449e97b 100644 --- a/python_websocket_server/helm/deepspeech-server/values.yaml +++ b/python_websocket_server/helm/stt_server/values.yaml @@ -1,4 +1,4 @@ -# Default values for deepspeech-server. +# Default values for stt-server. # This is a YAML-formatted file. # Declare variables to be passed into your templates. diff --git a/python_websocket_server/requirements.txt b/python_websocket_server/requirements.txt index 330b4b0..6c44deb 100644 --- a/python_websocket_server/requirements.txt +++ b/python_websocket_server/requirements.txt @@ -1,5 +1,5 @@ setuptools~=45.2.0 -deepspeech~=0.8.2 +stt~=1.1.0 black>=20.8b1 ffmpeg-python==0.2.0 sanic==20.3.0 diff --git a/python_websocket_server/deepspeech_server/__init__.py b/python_websocket_server/stt_server/__init__.py similarity index 100% rename from python_websocket_server/deepspeech_server/__init__.py rename to python_websocket_server/stt_server/__init__.py diff --git a/python_websocket_server/deepspeech_server/app.py b/python_websocket_server/stt_server/app.py similarity index 78% rename from python_websocket_server/deepspeech_server/app.py rename to python_websocket_server/stt_server/app.py index 2533f61..aa1537e 100644 --- a/python_websocket_server/deepspeech_server/app.py +++ b/python_websocket_server/stt_server/app.py @@ -7,24 +7,24 @@ from sanic import Sanic, response from sanic.log import logger -from deepspeech_server.engine import SpeechToTextEngine -from deepspeech_server.models import Response, Error +from stt_server.engine import SpeechToTextEngine +from stt_server.models import Response, Error -# Load app configs and initialize DeepSpeech model +# Load app configs and initialize STT model conf = ConfigFactory.parse_file("application.conf") engine = SpeechToTextEngine( - model_path=Path(conf["deepspeech.model"]).absolute().as_posix(), - scorer_path=Path(conf["deepspeech.scorer"]).absolute().as_posix(), + model_path=Path(conf["stt.model"]).absolute().as_posix(), + scorer_path=Path(conf["stt.scorer"]).absolute().as_posix(), ) # Initialze Sanic and ThreadPoolExecutor executor = ThreadPoolExecutor(max_workers=conf["server.threadpool.count"]) -app = Sanic("deepspeech_server") +app = Sanic("stt_server") @app.route("/", methods=["GET"]) async def healthcheck(_): - return response.text("Welcome to DeepSpeech Server!") + return response.text("Welcome to STT Server!") @app.websocket("/api/v1/stt") diff --git a/python_websocket_server/deepspeech_server/engine.py b/python_websocket_server/stt_server/engine.py similarity index 80% rename from python_websocket_server/deepspeech_server/engine.py rename to python_websocket_server/stt_server/engine.py index bc74856..004f44f 100644 --- a/python_websocket_server/deepspeech_server/engine.py +++ b/python_websocket_server/stt_server/engine.py @@ -3,7 +3,7 @@ import ffmpeg import numpy as np -from deepspeech import Model +from stt import Model def normalize_audio(audio): @@ -27,13 +27,13 @@ def normalize_audio(audio): class SpeechToTextEngine: def __init__(self, model_path, scorer_path): - self.model = Model(model_path=model_path) - self.model.enableExternalScorer(scorer_path=scorer_path) + self.model = Model(model_path) + self.model.enableExternalScorer(scorer_path) def run(self, audio): audio = normalize_audio(audio) audio = BytesIO(audio) with wave.Wave_read(audio) as wav: audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16) - result = self.model.stt(audio_buffer=audio) + result = self.model.stt(audio) return result diff --git a/python_websocket_server/deepspeech_server/models.py b/python_websocket_server/stt_server/models.py similarity index 100% rename from python_websocket_server/deepspeech_server/models.py rename to python_websocket_server/stt_server/models.py