In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Cloud Speech Transcription and Content Classification Tutorial

This notebook takes you through how to transcribe and analyze sample files. This uses the Speech-to-Text API for converting audio to text and the Natural Language API
which provides insights for unstructured text data. After getting started with transcription, will assign a content category for a sample audio file.

<a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/asrivas/speech-workshop/main/index.ipynb">
Open in Vertex AI Workbench
</a>

## Part 1: Getting Started with the Speech-to-Text API.

In [None]:
# Enable the Speech-to-Text API in this Google Cloud project
!gcloud services enable speech.googleapis.com

In [None]:
from google.cloud import speech_v1 as speech


speech_client = speech.SpeechClient()

def speech_to_text_sync(audio):
    """
    Transcribes the audio input.

    Args:
      audio The location of the audio file.
    """   
    config = dict(language_code="en-US")
    response = speech_client.recognize(config=config, audio=audio)
    return response

def print_output(response):
    """Prints the transcript and confidence score from a Recognize response object."""
    
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        best_alternative = result.alternatives[0]
        transcript = best_alternative.transcript
        confidence = best_alternative.confidence
        print("-" * 80)
        print(f"Transcript: {transcript}")
        print(f"Confidence: {confidence:.0%}")

### Run a sample audio file

The sample audio file is stored in a public Google Cloud Storage bucket, to listen to the audio yourself, navigate to the [preview URL](https://storage.googleapis.com/cloud-samples-data/speech/brooklyn_bridge.flac).

In [None]:
audio = dict(uri="gs://cloud-samples-data/speech/brooklyn_bridge.flac")
response = speech_to_text_sync(audio)
print_output(response)

## Part 2: Asynchronous Transcription with Google Cloud Storage

In this section we will transcribe a longer audio file. If your file is longer than 60 seconds, the asynchronous endpoint must be used with the data in GCS.

Our test file is the first 2 minutes of [episode 328](https://www.gcppodcast.com/post/episode-328-database-migration-service-w-shachar-guz-inna-weiner-gabe-weiss/) of the Google Cloud Podcast discussing the [Database Migration Service](https://cloud.google.com/database-migration).
The sample audio file is stored in a public Google Cloud Storage bucket, to listen to the audio yourself, navigate to the [preview URL](https://storage.googleapis.com/cloud-samples-data/speech/sample-podcasts/GCPEpisode328-DatabaseMigrationService-2min%20sample.flac).

In [None]:
def speech_to_text_async(audio):
    """
    Transcribes the audio input which must be stored in GCS.

    Args:
      audio The location of the audio file.
    """
    config = dict(language_code="en-US", audio_channel_count=2, enable_automatic_punctuation=True, model="latest_long")
    long_running_recognize_request = speech.types.LongRunningRecognizeRequest(audio=audio, config=config)
    print("Waiting for operation to complete...\n")
    operation = speech_client.long_running_recognize(long_running_recognize_request)
    response = operation.result(timeout=180) # timeout is in seconds
    return response

def get_full_transcription(response):
    """Returns the combined transcription segments"""
    full_text = ""
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        best_alternative = result.alternatives[0]
        transcript = best_alternative.transcript
        full_text += (f"{transcript}\n")
    return full_text

### Run a sample audio file

While this is running you can enable the [Natural Language API](https://console.cloud.google.com/marketplace/product/google/language.googleapis.com) for the next section, the transcription may take a couple of minutes.

In [None]:
audio = dict(uri="gs://cloud-samples-data/speech/sample-podcasts/GCPEpisode328-DatabaseMigrationService-2min sample.flac")
response = speech_to_text_async(audio)
full_text = get_full_transcription(response)
print(f"Transcript:\n {full_text}")

## Part 3: Analyze Results with the Cloud Natural Language API

In this section we will use the output text to classify the content using the Cloud Natural Language Content Classification [feature](https://cloud.google.com/natural-language/docs/classifying-text).

In [None]:
# Enable the Natural Language API in this Google Cloud project
!gcloud services enable language.googleapis.com

In [None]:
from google.cloud import language_v1


language_client = language_v1.LanguageServiceClient()

def classify_text(text_content):
    """
    Classifying Content in a String and prints the categories.

    Args:
      text_content The text content to analyze.
    """

    # Available types: PLAIN_TEXT, HTML
    type_ = language_v1.Document.Type.PLAIN_TEXT

    # Optional. If not specified, the language is automatically detected.
    # For list of supported languages:
    # https://cloud.google.com/natural-language/docs/languages
    language = "en"
    document = {"content": text_content, "type_": type_, "language": language}

    content_categories_version = (
        language_v1.ClassificationModelOptions.V2Model.ContentCategoriesVersion.V2)
    response = language_client.classify_text(request = {
        "document": document,
        "classification_model_options": {
            "v2_model": {
                "content_categories_version": content_categories_version
            }
        }
    })
    # Loop through classified categories returned from the API
    for category in response.categories:
        # Get the name of the category representing the document.
        # See the predefined taxonomy of categories:
        # https://cloud.google.com/natural-language/docs/categories
        print(u"Category name: {}".format(category.name))
        # Get the confidence. Number representing how certain the classifier
        # is that this category represents the provided text.
        print(u"Confidence: {}".format(category.confidence))

### Categorize the sample text

This will provide a list of categories from this [list](https://cloud.google.com/natural-language/docs/categories#categories_version_2).

In [None]:
classify_text(full_text)