Skip to content
This repository has been archived by the owner on Feb 9, 2023. It is now read-only.

Commit

Permalink
Update to Cloud Speech v1 (#206)
Browse files Browse the repository at this point in the history
The v1beta1 endpoint has been shut down, so this is necessary for using
the Cloud Speech demo now.

See #205.
  • Loading branch information
drigz committed Dec 22, 2017
1 parent d3f5a05 commit 660ad58
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 27 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
google-assistant-grpc==0.0.2
grpc-google-cloud-speech-v1beta1==0.14.0
google-auth-oauthlib==0.1.0
google-cloud-speech==0.30.0
google-auth-oauthlib==0.2.0
57 changes: 32 additions & 25 deletions src/aiy/_apis/_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,23 @@
import collections
import logging
import os
import sys
import tempfile
import wave

import google.auth
import google.auth.exceptions
import google.auth.transport.grpc
import google.auth.transport.requests
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
try:
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
except ImportError:
print("Failed to import google.cloud.speech. Try:")
print(" env/bin/pip install -r requirements.txt")
sys.exit(1)

from google.rpc import code_pb2 as error_code
from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
import grpc
Expand Down Expand Up @@ -135,7 +144,7 @@ def _get_speech_context(self):
"""Return a SpeechContext instance to bias recognition towards certain
phrases.
"""
return cloud_speech.SpeechContext(
return types.SpeechContext(
phrases=self._phrases,
)

Expand Down Expand Up @@ -289,53 +298,51 @@ def __init__(self, credentials_file):

self.language_code = aiy.i18n.get_language_code()

if not hasattr(cloud_speech, 'StreamingRecognizeRequest'):
raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.")

self._transcript = None

def reset(self):
super().reset()
self._transcript = None

def _make_service(self, channel):
return cloud_speech.SpeechStub(channel)
return speech.SpeechClient()

def _create_config_request(self):
recognition_config = cloud_speech.RecognitionConfig(
# There are a bunch of config options you can specify. See
# https://goo.gl/KPZn97 for the full list.
encoding='LINEAR16', # raw 16-bit signed LE samples
sample_rate=AUDIO_SAMPLE_RATE_HZ,
recognition_config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ,
# For a list of supported languages see:
# https://cloud.google.com/speech/docs/languages.
language_code=self.language_code, # a BCP-47 language tag
speech_context=self._get_speech_context(),
speech_contexts=[self._get_speech_context()],
)
streaming_config = cloud_speech.StreamingRecognitionConfig(
streaming_config = types.StreamingRecognitionConfig(
config=recognition_config,
single_utterance=True, # TODO(rodrigoq): find a way to handle pauses
single_utterance=True,
)

return cloud_speech.StreamingRecognizeRequest(
streaming_config=streaming_config)
# TODO(rodrigoq): we're actually returning a Config, not a Request, as
# the v1 API takes the Config and wraps it up in a Request, but we still
# want to share code with the Assistant API. Can we clean this up?
return streaming_config

def _create_audio_request(self, data):
return cloud_speech.StreamingRecognizeRequest(audio_content=data)
return types.StreamingRecognizeRequest(audio_content=data)

def _create_response_stream(self, service, request_stream, deadline):
return service.StreamingRecognize(request_stream, deadline)
def _create_response_stream(self, client, request_stream, deadline):
config = next(request_stream)
return client.streaming_recognize(config, request_stream)

def _stop_sending_audio(self, resp):
"""Check the endpointer type to see if an utterance has ended."""

if resp.endpointer_type:
endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name(
resp.endpointer_type)
logger.info('endpointer_type: %s', endpointer_type)
if resp.speech_event_type:
speech_event_type = types.StreamingRecognizeResponse.SpeechEventType.Name(
resp.speech_event_type)
logger.info('endpointer_type: %s', speech_event_type)

END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO')
return resp.endpointer_type == END_OF_AUDIO
END_OF_SINGLE_UTTERANCE = types.StreamingRecognizeResponse.SpeechEventType.Value('END_OF_SINGLE_UTTERANCE')
return resp.speech_event_type == END_OF_SINGLE_UTTERANCE

def _handle_response(self, resp):
"""Store the last transcript we received."""
Expand Down

0 comments on commit 660ad58

Please sign in to comment.