Skip to content

Commit

Permalink
Update python_websocket_server example to Coqui STT 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
reuben committed Jan 28, 2022
1 parent 00df423 commit 32121db
Show file tree
Hide file tree
Showing 17 changed files with 53 additions and 53 deletions.
22 changes: 11 additions & 11 deletions python_websocket_server/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
FROM tensorflow/tensorflow:1.15.2-py3

ARG DEEPSPEECH_CONTAINER_DIR=/opt/deepspeech
ARG DEEPSPEECH_VERSION=0.8.2
ARG STT_CONTAINER_DIR=/opt/stt
ARG STT_MODEL_ID=english/coqui/v1.0.0-huge-vocab

# Install OS dependencies
RUN apt-get update && \
apt-get install --no-install-recommends -y wget ffmpeg && \
apt-get clean

# Create app directory
RUN mkdir -p ${DEEPSPEECH_CONTAINER_DIR}
RUN mkdir -p ${STT_CONTAINER_DIR}

# Get pre-trained model
RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.pbmm" \
-O ${DEEPSPEECH_CONTAINER_DIR}/model.pbmm
RUN wget -q "https://github.com/mozilla/DeepSpeech/releases/download/v${DEEPSPEECH_VERSION}/deepspeech-${DEEPSPEECH_VERSION}-models.scorer" \
-O ${DEEPSPEECH_CONTAINER_DIR}/scorer.scorer
RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/model.tflite" \
-O ${STT_CONTAINER_DIR}/model.tflite
RUN wget -q "https://github.com/coqui-ai/STT-models/releases/download/v${STT_MODEL_ID}/huge-vocabulary.scorer" \
-O ${STT_CONTAINER_DIR}/scorer.scorer

# Install Python dependencies
RUN pip3 install --upgrade pip
Expand All @@ -24,9 +24,9 @@ COPY requirements.txt /tmp
RUN pip3 install -r /tmp/requirements.txt

# Copy code and configs
COPY deepspeech_server ${DEEPSPEECH_CONTAINER_DIR}/deepspeech_server
COPY application.conf ${DEEPSPEECH_CONTAINER_DIR}
COPY stt_server ${STT_CONTAINER_DIR}/stt_server
COPY application.conf ${STT_CONTAINER_DIR}

WORKDIR ${DEEPSPEECH_CONTAINER_DIR}
WORKDIR ${STT_CONTAINER_DIR}

ENTRYPOINT python -m deepspeech_server.app
ENTRYPOINT python -m stt_server.app
6 changes: 3 additions & 3 deletions python_websocket_server/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Python websocket-based server

This directory contains a simple service that receives audio data from clients, and serves the results
of DeepSpeech inference over a websocket. The server code in this project is a modified version of
of STT inference over a websocket. The server code in this project is a modified version of
[this GitHub project](https://github.com/zelo/deepspeech-rest-api).

Because STT transcriptions can typically be considered "long running tasks", using websockets for client-server
Expand All @@ -22,7 +22,7 @@ Server configuration is specified in the [`application.conf`](application.conf)
Make sure your model and scorer files are present in the same directory as the `application.conf` file. Then execute:

```
python -m deepspeech_server.app
python -m stt_server.app
```

### Sending requests to server
Expand Down Expand Up @@ -65,7 +65,7 @@ Example output:
### Kubernetes

The [helm](helm) directory contains an example Helm deployment, that configures an Nginx ingress to expose the
DeepSpeech service. The websocket timeout on the ingress is set to 1 hour.
STT service. The websocket timeout on the ingress is set to 1 hour.

## Contributing

Expand Down
2 changes: 1 addition & 1 deletion python_websocket_server/application.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
deepspeech {
stt {
model = "model.tflite"
scorer = "scorer.scorer"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v2
name: deepspeech-server
description: A server for DeepSpeech
name: stt-server
description: A server for Coqui STT

# A chart can be either an 'application' or a 'library' chart.
#
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Default values for deepspeech-server.
# Default values for stt-server.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

Expand Down Expand Up @@ -41,7 +41,7 @@ ingress:
nginx.ingress.kubernetes.io/proxy-body-size: 20m
hosts:
- host: "<host>"
paths: ["/deepspeech-server"]
paths: ["/stt-server"]
tls:
- secretName: "<secret_name>"
hosts:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Default values for deepspeech-server.
# Default values for stt-server.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

Expand Down Expand Up @@ -41,7 +41,7 @@ ingress:
nginx.ingress.kubernetes.io/proxy-body-size: 20m
hosts:
- host: "<host>"
paths: ["/deepspeech-server"]
paths: ["/stt-server"]
tls:
- secretName: "<secret_name>"
hosts:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "deepspeech-server.name" -}}
{{- define "stt-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

Expand All @@ -10,7 +10,7 @@ Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "deepspeech-server.fullname" -}}
{{- define "stt-server.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
Expand All @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "deepspeech-server.chart" -}}
{{- define "stt-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "deepspeech-server.labels" -}}
helm.sh/chart: {{ include "deepspeech-server.chart" . }}
{{ include "deepspeech-server.selectorLabels" . }}
{{- define "stt-server.labels" -}}
helm.sh/chart: {{ include "stt-server.chart" . }}
{{ include "stt-server.selectorLabels" . }}
app.kubernetes.io/version: {{ .Values.image.tag | default .Chart.AppVersion | quote }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
date: "{{ now | unixEpoch }}"
Expand All @@ -44,17 +44,17 @@ date: "{{ now | unixEpoch }}"
{{/*
Selector labels
*/}}
{{- define "deepspeech-server.selectorLabels" -}}
app.kubernetes.io/name: {{ include "deepspeech-server.name" . }}
{{- define "stt-server.selectorLabels" -}}
app.kubernetes.io/name: {{ include "stt-server.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "deepspeech-server.serviceAccountName" -}}
{{- define "stt-server.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "deepspeech-server.fullname" .) .Values.serviceAccount.name }}
{{- default (include "stt-server.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "deepspeech-server.fullname" . }}
name: {{ include "stt-server.fullname" . }}
labels:
{{- include "deepspeech-server.labels" . | nindent 4 }}
{{- include "stt-server.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "deepspeech-server.selectorLabels" . | nindent 6 }}
{{- include "stt-server.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "deepspeech-server.selectorLabels" . | nindent 8 }}
{{- include "stt-server.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "deepspeech-server.fullname" . -}}
{{- $fullName := include "stt-server.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "deepspeech-server.labels" . | nindent 4 }}
{{- include "stt-server.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "deepspeech-server.fullname" . }}
name: {{ include "stt-server.fullname" . }}
labels:
{{- include "deepspeech-server.labels" . | nindent 4 }}
{{- include "stt-server.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
Expand All @@ -12,4 +12,4 @@ spec:
protocol: TCP
name: http
selector:
{{- include "deepspeech-server.selectorLabels" . | nindent 4 }}
{{- include "stt-server.selectorLabels" . | nindent 4 }}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Default values for deepspeech-server.
# Default values for stt-server.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

Expand Down
2 changes: 1 addition & 1 deletion python_websocket_server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
setuptools~=45.2.0
deepspeech~=0.8.2
stt~=1.1.0
black>=20.8b1
ffmpeg-python==0.2.0
sanic==20.3.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@
from sanic import Sanic, response
from sanic.log import logger

from deepspeech_server.engine import SpeechToTextEngine
from deepspeech_server.models import Response, Error
from stt_server.engine import SpeechToTextEngine
from stt_server.models import Response, Error

# Load app configs and initialize DeepSpeech model
# Load app configs and initialize STT model
conf = ConfigFactory.parse_file("application.conf")
engine = SpeechToTextEngine(
model_path=Path(conf["deepspeech.model"]).absolute().as_posix(),
scorer_path=Path(conf["deepspeech.scorer"]).absolute().as_posix(),
model_path=Path(conf["stt.model"]).absolute().as_posix(),
scorer_path=Path(conf["stt.scorer"]).absolute().as_posix(),
)

# Initialze Sanic and ThreadPoolExecutor
executor = ThreadPoolExecutor(max_workers=conf["server.threadpool.count"])
app = Sanic("deepspeech_server")
app = Sanic("stt_server")


@app.route("/", methods=["GET"])
async def healthcheck(_):
return response.text("Welcome to DeepSpeech Server!")
return response.text("Welcome to STT Server!")


@app.websocket("/api/v1/stt")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import ffmpeg
import numpy as np
from deepspeech import Model
from stt import Model


def normalize_audio(audio):
Expand All @@ -27,13 +27,13 @@ def normalize_audio(audio):

class SpeechToTextEngine:
def __init__(self, model_path, scorer_path):
self.model = Model(model_path=model_path)
self.model.enableExternalScorer(scorer_path=scorer_path)
self.model = Model(model_path)
self.model.enableExternalScorer(scorer_path)

def run(self, audio):
audio = normalize_audio(audio)
audio = BytesIO(audio)
with wave.Wave_read(audio) as wav:
audio = np.frombuffer(wav.readframes(wav.getnframes()), np.int16)
result = self.model.stt(audio_buffer=audio)
result = self.model.stt(audio)
return result

0 comments on commit 32121db

Please sign in to comment.