In [None]:
# 
# This Notebook requires the following environment variables (.env file):
# 

# SPEECH_API_VERSION=2024-11-15
# SPEECH_API_KEY=TODO
# SPEECH_API_REGION=eastasia
# SPEECH_API_LOCALE=en-US
# SPEECH_TO_TEXT_INPUT_CONTAINER_SAS_URI=TODO
# SPEECH_TO_TEXT_OUTPUT_CONTAINER_SAS_URI=TODO


In [None]:
#
# This script derived from the following sample:
# https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/batch/python/python-client/main.py
#

import os
import time
import swagger_client
from dotenv import load_dotenv

def transcribe_from_container(uri, properties):
    """
    Transcribe all files in the container located at `uri` using the settings specified in `properties`
    using the base model for the specified locale.
    """
    transcription_definition = swagger_client.Transcription(
        display_name=NAME,
        description=DESCRIPTION,
        locale=LOCALE,
        content_container_url=uri,
        properties=properties
    )

    return transcription_definition

#
# Main
#

load_dotenv()

API_VERSION = os.getenv("SPEECH_API_VERSION")

# Initialize speech recognition engine
SUBSCRIPTION_KEY = os.getenv("SPEECH_API_KEY")
SERVICE_REGION = os.getenv("SPEECH_API_REGION")

LOCALE = os.getenv("SPEECH_API_LOCALE")

NAME = "Simple transcription"
DESCRIPTION = "Simple transcription"

# Provide the uri of a container with audio files for transcribing all of them
# with a single request. At least 'read' and 'list' (rl) permissions are required.
RECORDINGS_CONTAINER_URI = os.getenv("SPEECH_TO_TEXT_INPUT_CONTAINER_SAS_URI")

TEXT_CONTAINER_URI = os.getenv("SPEECH_TO_TEXT_OUTPUT_CONTAINER_SAS_URI")

# Set model information when doing transcription with custom models
MODEL_REFERENCE = None  # guid of a custom model

print("Starting transcription process...")

# configure API key authorization: subscription_key
configuration = swagger_client.Configuration()
configuration.api_key["Ocp-Apim-Subscription-Key"] = SUBSCRIPTION_KEY
configuration.host = f"https://{SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext"

# create the client object and authenticate
client = swagger_client.ApiClient(configuration)

# create an instance of the transcription api class
api = swagger_client.CustomSpeechTranscriptionsApi(api_client=client)

# Specify transcription properties by passing a dict to the properties parameter. See
# https://learn.microsoft.com/azure/cognitive-services/speech-service/batch-transcription-create?pivots=rest-api#request-configuration-options # noqa: E501
# for supported parameters.
properties = swagger_client.TranscriptionProperties(time_to_live_hours=6)
properties.word_level_timestamps_enabled = True
properties.display_form_word_level_timestamps_enabled = True
properties.punctuation_mode = "DictatedAndAutomatic"
properties.profanity_filter_mode = "Masked"
properties.destination_container_url = TEXT_CONTAINER_URI

# uncomment the following block to enable and configure speaker separation
# properties.diarization = swagger_client.DiarizationProperties(max_speakers=2, enabled=True)

transcription_definition = transcribe_from_container(RECORDINGS_CONTAINER_URI, properties)

created_transcription, status, headers = api.transcriptions_submit_with_http_info(
    transcription=transcription_definition, api_version=API_VERSION)

# get the transcription Id from the location URI
transcription_id = headers["location"].split("/")[-1].split("?")[0]

# Log information about the created transcription. If you should ask for support, please
# include this information.
print(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}")

print("Checking status.")

completed = False

while not completed:
    # wait for 5 seconds before refreshing the transcription status
    time.sleep(5)

    transcription = api.transcriptions_get(transcription_id, api_version=API_VERSION)
    print(f"Transcriptions status: {transcription.status}")

    if transcription.status in ("Failed", "Succeeded"):
        completed = True

    if transcription.status == "Succeeded":
        if properties.destination_container_url is not None:
            print("Transcription succeeded. Results are located in your Azure Blob Storage.")
            break

    elif transcription.status == "Failed":
        print(f"Transcription failed: {transcription.properties.error.message}")