Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic kaldi-gstreamer-server support #122

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions examples/audio_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,13 @@
print("IBM Speech to Text could not understand audio")
except sr.RequestError as e:
print("Could not request results from IBM Speech to Text service; {0}".format(e))

# recognize speech using kaldi-gstreamer-server at localhost
KALDI_HOST = "localhost:8888"
try:
print("Kaldi thinks you said " + str(r.recognize_kaldi(audio, host=KALDI_HOST)))
except sr.UnknownValueError:
print("Kaldi could not understand audio")
except sr.RequestError as e:
print("Could not request results from Kaldi; {0}".format(e))

9 changes: 9 additions & 0 deletions examples/extended_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,12 @@
print("IBM Speech to Text could not understand audio")
except sr.RequestError as e:
print("Could not request results from IBM Speech to Text service; {0}".format(e))

# recognize speech using kaldi-gstreamer-server at localhost
KALDI_HOST = "localhost:8888"
try:
print("Kaldi thinks you said " + str(r.recognize_kaldi(audio, host=KALDI_HOST, show_all=True)))
except sr.UnknownValueError:
print("Kaldi could not understand audio")
except sr.RequestError as e:
print("Could not request results from Kaldi; {0}".format(e))
60 changes: 60 additions & 0 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,66 @@ def recognize_google(self, audio_data, key = None, language = "en-US", show_all
return entry["transcript"]
raise UnknownValueError() # no transcriptions available

def recognize_kaldi(self, audio_data, host, show_all = False):
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance),
using the http POST api for kaldi-gstreamer-server.

Returns the most likely transcription if ``show_all`` is false (the default).
Otherwise, returns the raw API response as a JSON dictionary.

Raises a ``speech_recognition.UnknownValueError`` exception if the speech
is unintelligible. Raises a ``speech_recognition.RequestError`` exception
if the speech recognition operation failed, or if there is no internet connection.

"""
assert isinstance(audio_data, AudioData), "`audio_data` must be audio data"
assert isinstance(host, str), "`host` must be a string"

response_status={
0: "Success.",
2: "Aborted.",
1: "No speech.",
5: "Internal data flow error.",
9: "Not available."
}

url = "http://{0}/client/dynamic/recognize".format(host)
content_type=("audio/x-raw-int;rate={0}").format(audio_data.sample_rate)

request = Request(url, data = audio_data.get_raw_data(), headers = {
"Content-Type": content_type })

# obtain audio transcription results

try:
response = urlopen(request)
except HTTPError as e:
# use getattr to be compatible with Python 2.6
raise RequestError("recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code))))
except URLError as e:
raise RequestError("recognition connection failed: {0}".format(e.reason))
response_text = response.read().decode("utf-8")

actual_result = json.loads(response_text)

status = int(actual_result['status'])

if status not in (0,1):
raise RequestError("Server returned: {0} - {1} ".format(status,response_status[status]))

if "hypotheses" in actual_result:
result=actual_result["hypotheses"]

if show_all:
return actual_result

for entry in result:
if "utterance" in entry:
return entry["utterance"]

raise UnknownValueError() # no transcriptions available

def recognize_wit(self, audio_data, key, show_all = False):
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Wit.ai API.
Expand Down