Skip to content

Commit

Permalink
Alternate strategy,to just pass back responses.
Browse files Browse the repository at this point in the history
  • Loading branch information
daspecster committed Oct 13, 2016
1 parent a49643f commit ea0cb87
Show file tree
Hide file tree
Showing 14 changed files with 227 additions and 285 deletions.
28 changes: 9 additions & 19 deletions docs/speech-streaming.rst
Original file line number Diff line number Diff line change
@@ -1,33 +1,23 @@
Speech StreamingResponseContainer
=================================
Streaming Speech Response
=========================

.. automodule:: google.cloud.speech.streaming.container
.. automodule:: google.cloud.speech.streaming_response
:members:
:undoc-members:
:show-inheritance:

Speech Streaming Request helpers
================================
Streaming Speech Result
=======================

.. automodule:: google.cloud.speech.streaming.request
.. automodule:: google.cloud.speech.streaming_result
:members:
:undoc-members:
:show-inheritance:

Speech StreamingSpeechResponse
==============================
Streaming Endpointer Type
=========================

.. automodule:: google.cloud.speech.streaming.response
:members:
:undoc-members:
:show-inheritance:



Speech StreamingSpeechResult
============================

.. automodule:: google.cloud.speech.streaming.result
.. automodule:: google.cloud.speech.endpointer_type
:members:
:undoc-members:
:show-inheritance:
26 changes: 10 additions & 16 deletions docs/speech-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ See: `Speech Asynchronous Recognize`_
>>> import time
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=Encoding.LINEAR16,
... encoding=speech.Encoding.LINEAR16,
... sample_rate=44100)
>>> operation = client.async_recognize(sample, max_alternatives=2)
>>> retry_count = 100
Expand Down Expand Up @@ -82,10 +81,9 @@ Great Britian.
.. code-block:: python
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=Encoding.FLAC,
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> operation = client.async_recognize(sample, max_alternatives=2)
>>> alternatives = client.sync_recognize(
Expand All @@ -107,10 +105,9 @@ Example of using the profanity filter.
.. code-block:: python
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=Encoding.FLAC,
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> alternatives = client.sync_recognize(sample, max_alternatives=1,
... profanity_filter=True)
Expand All @@ -129,10 +126,9 @@ words to the vocabulary of the recognizer.
.. code-block:: python
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
... encoding=Encoding.FLAC,
... encoding=speech.Encoding.FLAC,
... sample_rate=44100)
>>> hints = ['hi', 'good afternoon']
>>> alternatives = client.sync_recognize(sample, max_alternatives=2,
Expand Down Expand Up @@ -161,12 +157,11 @@ data to possible text alternatives on the fly.
>>> import io
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> with io.open('./hello.wav', 'rb') as stream:
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
... sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
>>> for response in client.stream_recognize(sample):
... for response in client.stream_recognize(sample):
... print(response.transcript)
hello
... print(response.is_final)
Expand All @@ -182,12 +177,11 @@ result(s) are returned.
>>> import io
>>> from google.cloud import speech
>>> from google.cloud.speech.encoding import Encoding
>>> client = speech.Client()
>>> with io.open('./hello.wav', 'rb') as stream:
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
>>> for response in client.stream_recognize(sample,
... for response in client.stream_recognize(sample,
... interim_results=True):
... print(response.transcript)
hell
Expand All @@ -211,9 +205,9 @@ See: `Single Utterance`_
.. code-block:: python
>>> with io.open('./hello_pause_goodbye.wav', 'rb') as stream:
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
... sample_rate=16000)
>>> stream_container = client.stream_recognize(sample,
... stream_container = client.stream_recognize(sample,
... single_utterance=True)
>>> print(stream_container.get_full_text())
hello
Expand Down
1 change: 1 addition & 0 deletions speech/google/cloud/speech/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@

from google.cloud.speech.client import Client
from google.cloud.speech.connection import Connection
from google.cloud.speech.encoding import Encoding
183 changes: 173 additions & 10 deletions speech/google/cloud/speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@
from google.cloud.speech.connection import Connection
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.operation import Operation
from google.cloud.speech.streaming.request import _make_request_stream
from google.cloud.speech.sample import Sample
from google.cloud.speech.streaming.response import StreamingSpeechResponse
from google.cloud.speech.streaming_response import StreamingSpeechResponse

try:
from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
RecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognizeRequest)
except ImportError: # pragma: NO COVER
_HAVE_GAX = False
else:
Expand Down Expand Up @@ -284,16 +289,9 @@ def stream_recognize(self, sample, language_code=None,
with the is_final=false flag). If false or
omitted, only is_final=true result(s) are
returned.
:rtype: :class:`~streaming.StreamingResponseContainer`
:returns: An instance of ``StreamingReponseContainer``.
"""
if not _USE_GAX:
raise EnvironmentError('GRPC is required to use this API.')

if sample.stream.closed:
raise ValueError('Stream is closed.')
raise EnvironmentError('gRPC is required to use this API.')

requests = _make_request_stream(sample, language_code=language_code,
max_alternatives=max_alternatives,
Expand Down Expand Up @@ -379,3 +377,168 @@ def _build_request_data(sample, language_code=None, max_alternatives=None,
}

return data


def _make_request_stream(sample, language_code=None, max_alternatives=None,
profanity_filter=None, speech_context=None,
single_utterance=None, interim_results=None):
"""Generate stream of requests from sample.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:type single_utterance: boolean
:param single_utterance: [Optional] If false or omitted, the recognizer
will perform continuous recognition
(continuing to process audio even if the user
pauses speaking) until the client closes the
output stream (gRPC API) or when the maximum
time limit has been reached. Multiple
SpeechRecognitionResults with the is_final
flag set to true may be returned.
If true, the recognizer will detect a single
spoken utterance. When it detects that the
user has paused or stopped speaking, it will
return an END_OF_UTTERANCE event and cease
recognition. It will return no more than one
SpeechRecognitionResult with the is_final flag
set to true.
:type interim_results: boolean
:param interim_results: [Optional] If true, interim results (tentative
hypotheses) may be returned as they become
available (these interim results are indicated
with the is_final=false flag). If false or
omitted, only is_final=true result(s) are
returned.
"""
config_request = _make_streaming_config(
sample, language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter, speech_context=speech_context,
single_utterance=single_utterance, interim_results=interim_results)

# The config request MUST go first and not contain any audio data.
yield config_request

buff = b''
while True:
data = sample.stream.read(sample.chunk_size)
if not data:
break
# Optimize the request data size to around 100ms.
if len(buff) + len(data) >= sample.chunk_size:
yield StreamingRecognizeRequest(audio_content=buff)
buff = data
else:
b''.join((buff, data))

# Clear final contents of buffer.
yield StreamingRecognizeRequest(audio_content=buff)


def _make_streaming_config(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
interim_results):
"""Build streaming configuration.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: If True, the server will attempt to filter
out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
and phrases. This can also be used to add new
words to the vocabulary of the recognizer.
:type single_utterance: boolean
:param single_utterance: [Optional] If false or omitted, the recognizer
will perform continuous recognition
(continuing to process audio even if the user
pauses speaking) until the client closes the
output stream (gRPC API) or when the maximum
time limit has been reached. Multiple
SpeechRecognitionResults with the is_final
flag set to true may be returned.
If true, the recognizer will detect a single
spoken utterance. When it detects that the
user has paused or stopped speaking, it will
return an END_OF_UTTERANCE event and cease
recognition. It will return no more than one
SpeechRecognitionResult with the is_final flag
set to true.
:type interim_results: boolean
:param interim_results: [Optional] If true, interim results (tentative
hypotheses) may be returned as they become
available (these interim results are indicated
with the is_final=false flag). If false or
omitted, only is_final=true result(s) are
returned.
:rtype: :class:`~StreamingRecognitionConfig`
:returns: Instance of ``StreamingRecognitionConfig``.
"""
config = RecognitionConfig(
encoding=sample.encoding, sample_rate=sample.sample_rate,
language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter, speech_context=speech_context)

streaming_config = StreamingRecognitionConfig(
config=config, single_utterance=single_utterance,
interim_results=interim_results)

config_request = StreamingRecognizeRequest(
streaming_config=streaming_config)

return config_request
2 changes: 1 addition & 1 deletion speech/google/cloud/speech/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Sample(object):

def __init__(self, content=None, source_uri=None, stream=None,
encoding=None, sample_rate=None):
if [content, source_uri, stream].count(None) != 2:
if (content, source_uri, stream).count(None) != 2:
raise ValueError('Supply only one of \'content\', \'source_uri\''
' or stream.')

Expand Down
14 changes: 0 additions & 14 deletions speech/google/cloud/speech/streaming/endpointer_type.py

This file was deleted.

Loading

0 comments on commit ea0cb87

Please sign in to comment.