# Google Cloud Speechのサンプル

まず、microphone_sampleで、file.wavに日本語で何か音を記録すること

cf. https://cloud.google.com/speech-to-text/docs/streaming-recognize#speech-streaming-recognize-python

In [1]:
import os
GOOGLE_APPLICATION_CREDENTIALS = os.getcwd() + "/gcv-credentials.json"
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS

In [2]:
import io
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types

In [3]:
def transcribe_streaming(stream_file):
    """Streams transcription of the given audio file."""
    client = speech.SpeechClient()

    with io.open(stream_file, 'rb') as audio_file:
        content = audio_file.read()

    # In practice, stream should be a generator yielding chunks of audio data.
    stream = [content]
    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in stream)

    
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=44100, #16000
        language_code='ja-JP')
    streaming_config = types.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    responses = client.streaming_recognize(streaming_config, requests)

    for response in responses:
        # Once the transcription has settled, the first result will contain the
        # is_final result. The other results will be for subsequent portions of
        # the audio.
        
        print(len(response.results))
        for result in response.results:
            print('Finished: {}'.format(result.is_final))
            print('Stability: {}'.format(result.stability))
            alternatives = result.alternatives
            # The alternatives are ordered from most likely to least.
            for alternative in alternatives:
                print('Confidence: {}'.format(alternative.confidence))
                print(u'Transcript: {}'.format(alternative.transcript))

In [4]:
transcribe_streaming("file.wav")

1
Finished: True
Stability: 0.0
Confidence: 0.9167365431785583
Transcript: 今日はあいうえお
