Update python_websocket_server example to Coqui STT 1.1.0

coqui-ai · Jan 28, 2022 · 32121db · 32121db
1 parent 00df423
commit 32121db
Show file tree

Hide file tree

Showing 17 changed files with 53 additions and 53 deletions.
diff --git a/python_websocket_server/Dockerfile b/python_websocket_server/Dockerfile
@@ -1,21 +1,21 @@
 FROM tensorflow/tensorflow:1.15.2-py3
 
-ARG DEEPSPEECH_CONTAINER_DIR=/opt/deepspeech
-ARG DEEPSPEECH_VERSION=0.8.2
+ARG STT_CONTAINER_DIR=/opt/stt
+ARG STT_MODEL_ID=english/coqui/v1.0.0-huge-vocab
 
 # Install OS dependencies
 RUN apt-get update && \
     apt-get install --no-install-recommends -y wget ffmpeg && \
     apt-get clean
 
 # Create app directory
-RUN mkdir -p ${DEEPSPEECH_CONTAINER_DIR}
+RUN mkdir -p ${STT_CONTAINER_DIR}
 
 # Get pre-trained model
-RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.pbmm" \
-         -O ${DEEPSPEECH_CONTAINER_DIR}/model.pbmm
-RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" \
-         -O ${DEEPSPEECH_CONTAINER_DIR}/scorer.scorer
+RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/model.tflite" \
+         -O ${STT_CONTAINER_DIR}/model.tflite
+RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/huge-vocabulary.scorer" \
+         -O ${STT_CONTAINER_DIR}/scorer.scorer
 
 # Install Python dependencies
 RUN pip3 install --upgrade pip
@@ -24,9 +24,9 @@ COPY requirements.txt /tmp
 RUN pip3 install -r /tmp/requirements.txt
 
 # Copy code and configs
-COPY deepspeech_server ${DEEPSPEECH_CONTAINER_DIR}/deepspeech_server
-COPY application.conf ${DEEPSPEECH_CONTAINER_DIR}
+COPY stt_server ${STT_CONTAINER_DIR}/stt_server
+COPY application.conf ${STT_CONTAINER_DIR}
 
-WORKDIR ${DEEPSPEECH_CONTAINER_DIR}
+WORKDIR ${STT_CONTAINER_DIR}
 
-ENTRYPOINT python -m deepspeech_server.app
+ENTRYPOINT python -m stt_server.app
diff --git a/python_websocket_server/README.md b/python_websocket_server/README.md
@@ -1,7 +1,7 @@
 # Python websocket-based server
 
 This directory contains a simple service that receives audio data from clients, and serves the results
-of DeepSpeech inference over a websocket. The server code in this project is a modified version of
+of STT inference over a websocket. The server code in this project is a modified version of
 [this GitHub project](https://github.com/zelo/deepspeech-rest-api).
 
 Because STT transcriptions can typically be considered "long running tasks", using websockets for client-server 
@@ -22,7 +22,7 @@ Server configuration is specified in the [`application.conf`](application.conf)
 Make sure your model and scorer files are present in the same directory as the `application.conf` file. Then execute:
 
 ```
-python -m deepspeech_server.app
+python -m stt_server.app
 ```
 
 ### Sending requests to server
@@ -65,7 +65,7 @@ Example output:
 ### Kubernetes
 
 The [helm](helm) directory contains an example Helm deployment, that configures an Nginx ingress to expose the 
-DeepSpeech service. The websocket timeout on the ingress is set to 1 hour.
+STT service. The websocket timeout on the ingress is set to 1 hour.
 
 ## Contributing
 

diff --git a/python_websocket_server/application.conf b/python_websocket_server/application.conf
@@ -1,4 +1,4 @@
-deepspeech {
+stt {
   model = "model.tflite"
   scorer = "scorer.scorer"
 }

diff --git a/...server/helm/deepspeech-server/.helmignore → ...socket_server/helm/stt_server/.helmignore b/...server/helm/deepspeech-server/.helmignore → ...socket_server/helm/stt_server/.helmignore
diff --git a/..._server/helm/deepspeech-server/Chart.yaml → ...bsocket_server/helm/stt_server/Chart.yaml b/..._server/helm/deepspeech-server/Chart.yaml → ...bsocket_server/helm/stt_server/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
-name: deepspeech-server
-description: A server for DeepSpeech
+name: stt-server
+description: A server for Coqui STT
 
 # A chart can be either an 'application' or a 'library' chart.
 #

diff --git a/...epspeech-server/overrides/values.dev.yaml → ...helm/stt_server/overrides/values.dev.yaml b/...epspeech-server/overrides/values.dev.yaml → ...helm/stt_server/overrides/values.dev.yaml
@@ -1,4 +1,4 @@
-# Default values for deepspeech-server.
+# Default values for stt-server.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 
@@ -41,7 +41,7 @@ ingress:
     nginx.ingress.kubernetes.io/proxy-body-size: 20m
   hosts:
     - host: "<host>"
-      paths: ["/deepspeech-server"]
+      paths: ["/stt-server"]
   tls:
     - secretName: "<secret_name>"
       hosts:

diff --git a/...pspeech-server/overrides/values.prod.yaml → ...elm/stt_server/overrides/values.prod.yaml b/...pspeech-server/overrides/values.prod.yaml → ...elm/stt_server/overrides/values.prod.yaml
@@ -1,4 +1,4 @@
-# Default values for deepspeech-server.
+# Default values for stt-server.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 
@@ -41,7 +41,7 @@ ingress:
     nginx.ingress.kubernetes.io/proxy-body-size: 20m
   hosts:
     - host: "<host>"
-      paths: ["/deepspeech-server"]
+      paths: ["/stt-server"]
   tls:
     - secretName: "<secret_name>"
       hosts:

diff --git a/.../deepspeech-server/templates/_helpers.tpl → ...er/helm/stt_server/templates/_helpers.tpl b/.../deepspeech-server/templates/_helpers.tpl → ...er/helm/stt_server/templates/_helpers.tpl
@@ -1,7 +1,7 @@
 {{/*
 Expand the name of the chart.
 */}}
-{{- define "deepspeech-server.name" -}}
+{{- define "stt-server.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
@@ -10,7 +10,7 @@ Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
-{{- define "deepspeech-server.fullname" -}}
+{{- define "stt-server.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
 {{/*
 Create chart name and version as used by the chart label.
 */}}
-{{- define "deepspeech-server.chart" -}}
+{{- define "stt-server.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
 Common labels
 */}}
-{{- define "deepspeech-server.labels" -}}
-helm.sh/chart: {{ include "deepspeech-server.chart" . }}
-{{ include "deepspeech-server.selectorLabels" . }}
+{{- define "stt-server.labels" -}}
+helm.sh/chart: {{ include "stt-server.chart" . }}
+{{ include "stt-server.selectorLabels" . }}
 app.kubernetes.io/version: {{ .Values.image.tag | default .Chart.AppVersion | quote }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 date: "{{ now | unixEpoch }}"
@@ -44,17 +44,17 @@ date: "{{ now | unixEpoch }}"
 {{/*
 Selector labels
 */}}
-{{- define "deepspeech-server.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "deepspeech-server.name" . }}
+{{- define "stt-server.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "stt-server.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 
 {{/*
 Create the name of the service account to use
 */}}
-{{- define "deepspeech-server.serviceAccountName" -}}
+{{- define "stt-server.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
-{{- default (include "deepspeech-server.fullname" .) .Values.serviceAccount.name }}
+{{- default (include "stt-server.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}

diff --git a/...epspeech-server/templates/deployment.yaml → ...helm/stt_server/templates/deployment.yaml b/...epspeech-server/templates/deployment.yaml → ...helm/stt_server/templates/deployment.yaml
@@ -1,24 +1,24 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ include "deepspeech-server.fullname" . }}
+  name: {{ include "stt-server.fullname" . }}
   labels:
-    {{- include "deepspeech-server.labels" . | nindent 4 }}
+    {{- include "stt-server.labels" . | nindent 4 }}
 spec:
 {{- if not .Values.autoscaling.enabled }}
   replicas: {{ .Values.replicaCount }}
 {{- end }}
   selector:
     matchLabels:
-      {{- include "deepspeech-server.selectorLabels" . | nindent 6 }}
+      {{- include "stt-server.selectorLabels" . | nindent 6 }}
   template:
     metadata:
     {{- with .Values.podAnnotations }}
       annotations:
         {{- toYaml . | nindent 8 }}
     {{- end }}
       labels:
-        {{- include "deepspeech-server.selectorLabels" . | nindent 8 }}
+        {{- include "stt-server.selectorLabels" . | nindent 8 }}
     spec:
       {{- with .Values.imagePullSecrets }}
       imagePullSecrets:

diff --git a/.../deepspeech-server/templates/ingress.yaml → ...er/helm/stt_server/templates/ingress.yaml b/.../deepspeech-server/templates/ingress.yaml → ...er/helm/stt_server/templates/ingress.yaml
@@ -1,12 +1,12 @@
 {{- if .Values.ingress.enabled -}}
-  {{- $fullName := include "deepspeech-server.fullname" . -}}
+  {{- $fullName := include "stt-server.fullname" . -}}
   {{- $svcPort := .Values.service.port -}}
 apiVersion: extensions/v1beta1
 kind: Ingress
 metadata:
   name: {{ $fullName }}
   labels:
-  {{- include "deepspeech-server.labels" . | nindent 4 }}
+  {{- include "stt-server.labels" . | nindent 4 }}
   {{- with .Values.ingress.annotations }}
   annotations:
   {{- toYaml . | nindent 4 }}

diff --git a/.../deepspeech-server/templates/service.yaml → ...er/helm/stt_server/templates/service.yaml b/.../deepspeech-server/templates/service.yaml → ...er/helm/stt_server/templates/service.yaml
@@ -1,9 +1,9 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ include "deepspeech-server.fullname" . }}
+  name: {{ include "stt-server.fullname" . }}
   labels:
-    {{- include "deepspeech-server.labels" . | nindent 4 }}
+    {{- include "stt-server.labels" . | nindent 4 }}
 spec:
   type: {{ .Values.service.type }}
   ports:
@@ -12,4 +12,4 @@ spec:
       protocol: TCP
       name: http
   selector:
-    {{- include "deepspeech-server.selectorLabels" . | nindent 4 }}
+    {{- include "stt-server.selectorLabels" . | nindent 4 }}
diff --git a/...server/helm/deepspeech-server/values.yaml → ...socket_server/helm/stt_server/values.yaml b/...server/helm/deepspeech-server/values.yaml → ...socket_server/helm/stt_server/values.yaml
@@ -1,4 +1,4 @@
-# Default values for deepspeech-server.
+# Default values for stt-server.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 

diff --git a/python_websocket_server/requirements.txt b/python_websocket_server/requirements.txt
@@ -1,5 +1,5 @@
 setuptools~=45.2.0
-deepspeech~=0.8.2
+stt~=1.1.0
 black>=20.8b1
 ffmpeg-python==0.2.0
 sanic==20.3.0

diff --git a/...cket_server/deepspeech_server/__init__.py → ...n_websocket_server/stt_server/__init__.py b/...cket_server/deepspeech_server/__init__.py → ...n_websocket_server/stt_server/__init__.py
diff --git a/...websocket_server/deepspeech_server/app.py → python_websocket_server/stt_server/app.py b/...websocket_server/deepspeech_server/app.py → python_websocket_server/stt_server/app.py
@@ -7,24 +7,24 @@
 from sanic import Sanic, response
 from sanic.log import logger
 
-from deepspeech_server.engine import SpeechToTextEngine
-from deepspeech_server.models import Response, Error
+from stt_server.engine import SpeechToTextEngine
+from stt_server.models import Response, Error
 
-# Load app configs and initialize DeepSpeech model
+# Load app configs and initialize STT model
 conf = ConfigFactory.parse_file("application.conf")
 engine = SpeechToTextEngine(
-    model_path=Path(conf["deepspeech.model"]).absolute().as_posix(),
-    scorer_path=Path(conf["deepspeech.scorer"]).absolute().as_posix(),
+    model_path=Path(conf["stt.model"]).absolute().as_posix(),
+    scorer_path=Path(conf["stt.scorer"]).absolute().as_posix(),
 )
 
 # Initialze Sanic and ThreadPoolExecutor
 executor = ThreadPoolExecutor(max_workers=conf["server.threadpool.count"])
-app = Sanic("deepspeech_server")
+app = Sanic("stt_server")
 
 
 @app.route("/", methods=["GET"])
 async def healthcheck(_):
-    return response.text("Welcome to DeepSpeech Server!")
+    return response.text("Welcome to STT Server!")
 
 
 @app.websocket("/api/v1/stt")

diff --git a/...socket_server/deepspeech_server/engine.py → python_websocket_server/stt_server/engine.py b/...socket_server/deepspeech_server/engine.py → python_websocket_server/stt_server/engine.py
@@ -3,7 +3,7 @@
 
 import ffmpeg
 import numpy as np
-from deepspeech import Model
+from stt import Model
 
 
 def normalize_audio(audio):
@@ -27,13 +27,13 @@ def normalize_audio(audio):
 
 class SpeechToTextEngine:
     def __init__(self, model_path, scorer_path):
-        self.model = Model(model_path=model_path)
-        self.model.enableExternalScorer(scorer_path=scorer_path)
+        self.model = Model(model_path)
+        self.model.enableExternalScorer(scorer_path)
 
     def run(self, audio):
         audio = normalize_audio(audio)
         audio = BytesIO(audio)
         with wave.Wave_read(audio) as wav:
             audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16)
-        result = self.model.stt(audio_buffer=audio)
+        result = self.model.stt(audio)
         return result
diff --git a/...socket_server/deepspeech_server/models.py → python_websocket_server/stt_server/models.py b/...socket_server/deepspeech_server/models.py → python_websocket_server/stt_server/models.py