In [None]:
import os
from google.cloud import speech

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'client_service_key.json'
speech_client = speech.SpeechClient()

In [None]:
def hello_gcs(event, context):
    """Triggered by a change to a Cloud Storage bucket.
    Args:
         event (dict): Event payload.
         context (google.cloud.functions.Context): Metadata for the event.
    """
    file = event
    print(f"Processing file: {file['name']}.")
    print(event)

    if file['name'].endswith(".wav"):
        media_uri = f"gs://heroxr-development.appspot.com/{file['name']}"
        transcription, magnitude, score = process_audio_file(media_uri)

In [None]:
def process_audio_file(media_uri):
    """Processes .wav files that are added to the bucket
    Args:
        media_uri (str): path to file within bucket
    """
    transcription = speech_to_text(media_uri)
    magnitude, score = analyze_sentiment(transcription)
    
    return transcription, magnitude, score

In [1]:
def analyze_sentiment(transcription):
    """Analyzes the sentiment of a transcribed audio file.
    Args:
        transcription (str): the transcription to analyze
    """
    from google.cloud import language_v1

    client = language_v1.LanguageServiceClient()
    document = language_v1.Document(
        content=transcription, type_=language_v1.Document.Type.PLAIN_TEXT
    )
    annotations = client.analyze_sentiment(request={'document': document})

    magnitude = annotations.document_sentiment.magnitude
    score = annotations.document_sentiment.score
    
    return magnitude, score

In [None]:
def speech_to_text(media_uri):
    """Converts .wav files to text
    Args:
        medai_uri (str): path to file within bucket
    """
    from google.cloud import speech

    speech_client = speech.SpeechClient()
    long_audio = speech.RecognitionAudio(uri=media_uri)

    config = speech.RecognitionConfig(
        sample_rate_hertz=44100, # Change to match sampling rate of unity files
        enable_automatic_punctuation=True,
        language_code='en-US',
        audio_channel_count=2
    )

    # We use long_running_recognize because media files might be over 1 minute in length
    operation = speech_client.long_running_recognize(
        config=config,
        audio=long_audio
    )
    response = operation.result(timeout=90)

    transcription = ""
    for i, result in enumerate(response.results):
        # If/else determines when to add spaces for concatenation
        # There will be no trailing space added for the final transcription segment
        if i == (len(response.results) - 1):
            transcription += result.alternatives[0].transcript
        else:
            transcription += result.alternatives[0].transcript + ' '

    return transcription

In [2]:
import os

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'client_service_key.json'


def speech_to_text(media_uri):
    """Converts .wav files to text
    Args:
        medai_uri (str): path to file within bucket
    """
    from google.cloud import speech

    speech_client = speech.SpeechClient()
    long_audio = speech.RecognitionAudio(uri=media_uri)

    config = speech.RecognitionConfig(
        sample_rate_hertz=48000, # Change to match sampling rate of unity files
        enable_automatic_punctuation=True,
        language_code='en-US'#,
        #audio_channel_count=2
    )

    # We use long_running_recognize because media files might be over 1 minute in length
    operation = speech_client.long_running_recognize(
        config=config,
        audio=long_audio
    )
    response = operation.result(timeout=90)

    transcription = ""
    for i, result in enumerate(response.results):
        # If/else determines when to add spaces for concatenation
        # There will be no trailing space added for the final transcription segment
        if i == (len(response.results) - 1):
            transcription += result.alternatives[0].transcript
        else:
            transcription += result.alternatives[0].transcript + ' '

    return transcription
#     transcription = response.results.alternatives[0].transcript.join(' ')
    
#     return transcription

In [None]:
media_uri = "gs://hero-speech-to-text-media-files/testing_converted.wav"
transcription = speech_to_text(media_uri=media_uri)
print(transcription)

In [None]:
transcription = "In 2006 Geoffrey, Hinton at all published a paper showing how to train, a deep neural network. If pull up recognizing handwritten, digits with the state-of-the-art Precision. They branded this technique deep learning. A deep neural network is a very simplified model of our cerebral. Cortex composed, of a stack of layers of artificial neurons. Training, a deep neural, net was widely considered impossible at the time. And most researchers had abandoned, the idea in the late 1990s. This paper revived the interest of the scientific community. And before long, many new papers, demonstrated that deep learning was not only possible, but capable of mind-blowing achievements that no other machine learning techniques, could hope to match This enthusiasm soon extended too many other areas of machine learning a decade or so, later, machine learning has conquered the industry. It is at the heart of much of the Magic. In today's high-tech products, ranking, your web, search results powering your smartphone speech, recognition, recommending videos and beating the world champion at the game of go before, you know, it will be driving your car."
magnitude, score = analyze_sentiment(transcription)
print(f"magnitude: {magnitude}, score: {score}")