Add stability information to streaming results. Fixes #2702.

googleapis · Nov 11, 2016 · 80bd6f8 · 80bd6f8
1 parent ced9df4
commit 80bd6f8
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 14 deletions.
diff --git a/google-cloud-speech/google/cloud/speech/client.py b/google-cloud-speech/google/cloud/speech/client.py
@@ -27,6 +27,7 @@
 from google.cloud.speech.connection import Connection
 from google.cloud.speech.encoding import Encoding
 from google.cloud.speech.operation import Operation
+from google.cloud.speech.result import StreamingSpeechResult
 from google.cloud.speech.sample import Sample
 
 
@@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None,
             Streaming recognition requests are limited to 1 minute of audio.
             See: https://cloud.google.com/speech/limits#content
 
-        Yields: list of :class:`~google.cloud.speech.alternative.Alternatives`
+        Yields: Instance of
+                :class:`~google.cloud.speech.result.StreamingSpeechResult`
                 containing results and metadata from the streaming request.
 
         :type sample: :class:`~google.cloud.speech.sample.Sample`
@@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None,
         for response in responses:
             for result in response.results:
                 if result.is_final or interim_results:
-                    yield [Alternative.from_pb(alternative)
-                           for alternative in result.alternatives]
+                    yield StreamingSpeechResult.from_pb(result)
 
     def sync_recognize(self, sample, language_code=None,
                        max_alternatives=None, profanity_filter=None,

diff --git a/google-cloud-speech/google/cloud/speech/result.py b/google-cloud-speech/google/cloud/speech/result.py
@@ -0,0 +1,54 @@
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Speech result representations."""
+
+from google.cloud.speech.alternative import Alternative
+
+
+class StreamingSpeechResult(object):
+    """Streaming speech result representation.
+
+    :type alternatives: list
+    :param alternatives: List of
+                         :class:`~google.cloud.speech.alternative.Alternative`.
+
+    :type is_final: bool
+    :param is_final: Boolean indicator of results finality.
+
+    :type stability: float
+    :param stability: 0.0-1.0 stability score for the results returned.
+    """
+    def __init__(self, alternatives, is_final=False, stability=0.0):
+        self.alternatives = alternatives
+        self.is_final = is_final
+        self.stability = stability
+
+    @classmethod
+    def from_pb(cls, response):
+        """Factory: construct instance of ``StreamingSpeechResult``.
+
+        :type response: :class:`~google.cloud.grpc.speech.v1beta1\
+                               .cloud_speech_pb2.StreamingRecognizeResult`
+        :param response: Instance of ``StreamingRecognizeResult`` protobuf.
+
+        :rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult`
+        :returns: Instance of ``StreamingSpeechResult``.
+        """
+        alternatives = [Alternative.from_pb(alternative)
+                        for alternative in response.alternatives]
+        is_final = response.is_final
+        stability = response.stability
+        return cls(alternatives=alternatives, is_final=is_final,
+                   stability=stability)
diff --git a/google-cloud-speech/unit_tests/test_client.py b/google-cloud-speech/unit_tests/test_client.py
@@ -28,7 +28,7 @@ def _make_result(alternatives=()):
     )
 
 
-def _make_streaming_result(alternatives=(), is_final=True):
+def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
     from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2
 
     return cloud_speech_pb2.StreamingRecognitionResult(
@@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
             ) for alternative in alternatives
         ],
         is_final=is_final,
+        stability=stability,
     )
 
 
@@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):
 
         from google.cloud.speech import _gax
         from google.cloud.speech.encoding import Encoding
+        from google.cloud.speech.client import StreamingSpeechResult
 
         stream = BytesIO(b'Some audio data...')
         credentials = _Credentials()
@@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
             'confidence': 0.0123456,
         }]
         first_response = _make_streaming_response(
-            _make_streaming_result([], is_final=False))
+            _make_streaming_result([], is_final=False, stability=0.122435))
         second_response = _make_streaming_response(
-            _make_streaming_result(alternatives, is_final=False))
+            _make_streaming_result(alternatives, is_final=False,
+                                   stability=0.1432343))
         last_response = _make_streaming_response(
-            _make_streaming_result(alternatives, is_final=True))
+            _make_streaming_result(alternatives, is_final=True,
+                                   stability=0.9834534))
         responses = [first_response, second_response, last_response]
 
         channel_args = []
@@ -521,15 +525,28 @@ def speech_api(channel=None):
 
         results = list(client.streaming_recognize(sample,
                                                   interim_results=True))
-        self.assertEqual(results[0], [])
-        self.assertEqual(results[1][0].transcript,
+
+        self.assertEqual(len(results), 3)
+        self.assertIsInstance(results[0], StreamingSpeechResult)
+        self.assertEqual(results[0].alternatives, [])
+        self.assertFalse(results[0].is_final)
+        self.assertEqual(results[0].stability, 0.122435)
+        self.assertEqual(results[1].stability, 0.1432343)
+        self.assertFalse(results[1].is_final)
+        self.assertEqual(results[1].alternatives[0].transcript,
                          alternatives[0]['transcript'])
-        self.assertEqual(results[1][0].confidence,
+        self.assertEqual(results[1].alternatives[0].confidence,
                          alternatives[0]['confidence'])
-        self.assertEqual(results[1][1].transcript,
+        self.assertEqual(results[1].alternatives[1].transcript,
                          alternatives[1]['transcript'])
-        self.assertEqual(results[1][1].confidence,
+        self.assertEqual(results[1].alternatives[1].confidence,
                          alternatives[1]['confidence'])
+        self.assertTrue(results[2].is_final)
+        self.assertEqual(results[2].stability, 0.9834534)
+        self.assertEqual(results[2].alternatives[0].transcript,
+                         alternatives[0]['transcript'])
+        self.assertEqual(results[2].alternatives[0].confidence,
+                         alternatives[0]['confidence'])
 
     def test_stream_recognize(self):
         from io import BytesIO
@@ -582,9 +599,9 @@ def speech_api(channel=None):
 
         results = list(client.streaming_recognize(sample))
         self.assertEqual(len(results), 1)
-        self.assertEqual(results[0][0].transcript,
+        self.assertEqual(results[0].alternatives[0].transcript,
                          alternatives[0]['transcript'])
-        self.assertEqual(results[0][0].confidence,
+        self.assertEqual(results[0].alternatives[0].confidence,
                          alternatives[0]['confidence'])
 
     def test_stream_recognize_no_results(self):