Skip to content

Commit

Permalink
Add stability information to streaming results. Fixes #2702.
Browse files Browse the repository at this point in the history
  • Loading branch information
daspecster committed Nov 11, 2016
1 parent ced9df4 commit 80bd6f8
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 14 deletions.
7 changes: 4 additions & 3 deletions google-cloud-speech/google/cloud/speech/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from google.cloud.speech.connection import Connection
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.operation import Operation
from google.cloud.speech.result import StreamingSpeechResult
from google.cloud.speech.sample import Sample


Expand Down Expand Up @@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None,
Streaming recognition requests are limited to 1 minute of audio.
See: https://cloud.google.com/speech/limits#content
Yields: list of :class:`~google.cloud.speech.alternative.Alternatives`
Yields: Instance of
:class:`~google.cloud.speech.result.StreamingSpeechResult`
containing results and metadata from the streaming request.
:type sample: :class:`~google.cloud.speech.sample.Sample`
Expand Down Expand Up @@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None,
for response in responses:
for result in response.results:
if result.is_final or interim_results:
yield [Alternative.from_pb(alternative)
for alternative in result.alternatives]
yield StreamingSpeechResult.from_pb(result)

def sync_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
Expand Down
54 changes: 54 additions & 0 deletions google-cloud-speech/google/cloud/speech/result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Speech result representations."""

from google.cloud.speech.alternative import Alternative


class StreamingSpeechResult(object):
"""Streaming speech result representation.
:type alternatives: list
:param alternatives: List of
:class:`~google.cloud.speech.alternative.Alternative`.
:type is_final: bool
:param is_final: Boolean indicator of results finality.
:type stability: float
:param stability: 0.0-1.0 stability score for the results returned.
"""
def __init__(self, alternatives, is_final=False, stability=0.0):
self.alternatives = alternatives
self.is_final = is_final
self.stability = stability

@classmethod
def from_pb(cls, response):
"""Factory: construct instance of ``StreamingSpeechResult``.
:type response: :class:`~google.cloud.grpc.speech.v1beta1\
.cloud_speech_pb2.StreamingRecognizeResult`
:param response: Instance of ``StreamingRecognizeResult`` protobuf.
:rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult`
:returns: Instance of ``StreamingSpeechResult``.
"""
alternatives = [Alternative.from_pb(alternative)
for alternative in response.alternatives]
is_final = response.is_final
stability = response.stability
return cls(alternatives=alternatives, is_final=is_final,
stability=stability)
39 changes: 28 additions & 11 deletions google-cloud-speech/unit_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _make_result(alternatives=()):
)


def _make_streaming_result(alternatives=(), is_final=True):
def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2

return cloud_speech_pb2.StreamingRecognitionResult(
Expand All @@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
) for alternative in alternatives
],
is_final=is_final,
stability=stability,
)


Expand Down Expand Up @@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):

from google.cloud.speech import _gax
from google.cloud.speech.encoding import Encoding
from google.cloud.speech.client import StreamingSpeechResult

stream = BytesIO(b'Some audio data...')
credentials = _Credentials()
Expand All @@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
'confidence': 0.0123456,
}]
first_response = _make_streaming_response(
_make_streaming_result([], is_final=False))
_make_streaming_result([], is_final=False, stability=0.122435))
second_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=False))
_make_streaming_result(alternatives, is_final=False,
stability=0.1432343))
last_response = _make_streaming_response(
_make_streaming_result(alternatives, is_final=True))
_make_streaming_result(alternatives, is_final=True,
stability=0.9834534))
responses = [first_response, second_response, last_response]

channel_args = []
Expand All @@ -521,15 +525,28 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample,
interim_results=True))
self.assertEqual(results[0], [])
self.assertEqual(results[1][0].transcript,

self.assertEqual(len(results), 3)
self.assertIsInstance(results[0], StreamingSpeechResult)
self.assertEqual(results[0].alternatives, [])
self.assertFalse(results[0].is_final)
self.assertEqual(results[0].stability, 0.122435)
self.assertEqual(results[1].stability, 0.1432343)
self.assertFalse(results[1].is_final)
self.assertEqual(results[1].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[1][0].confidence,
self.assertEqual(results[1].alternatives[0].confidence,
alternatives[0]['confidence'])
self.assertEqual(results[1][1].transcript,
self.assertEqual(results[1].alternatives[1].transcript,
alternatives[1]['transcript'])
self.assertEqual(results[1][1].confidence,
self.assertEqual(results[1].alternatives[1].confidence,
alternatives[1]['confidence'])
self.assertTrue(results[2].is_final)
self.assertEqual(results[2].stability, 0.9834534)
self.assertEqual(results[2].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[2].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize(self):
from io import BytesIO
Expand Down Expand Up @@ -582,9 +599,9 @@ def speech_api(channel=None):

results = list(client.streaming_recognize(sample))
self.assertEqual(len(results), 1)
self.assertEqual(results[0][0].transcript,
self.assertEqual(results[0].alternatives[0].transcript,
alternatives[0]['transcript'])
self.assertEqual(results[0][0].confidence,
self.assertEqual(results[0].alternatives[0].confidence,
alternatives[0]['confidence'])

def test_stream_recognize_no_results(self):
Expand Down

0 comments on commit 80bd6f8

Please sign in to comment.