In [None]:
!pip install -r requirements.txt
!pip install ./python-client
!brew install swagger-codegen

In [None]:
%bash
RESOURCE_GROUP_NAME=rg-anujbh-azure-diarization
COG_SERVICES_RESOURCE_NAME=azure-diarization-batch
STORAGE_ACCOUNT_NAME=anujbhdiarizationstorage
STORAGE_CONTAINER_NAME=anujbhdiarizationstoragecontainer
#Create Azure Resource Group
az group create -l westus2 -n $RESOURCE_GROUP_NAME

#Create Cognitive Services Resource
az cognitiveservices account create -n $COG_SERVICES_RESOURCE_NAME -g $RESOURCE_GROUP_NAME --kind CognitiveServices --sku S0 -l westus2 --yes
az cognitiveservices account keys list --name $COG_SERVICES_RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME

#Create storage resource/container
az storage account create -n $STORAGE_ACCOUNT_NAME -g $RESOURCE_GROUP_NAME -l westus --sku Standard_LRS

#TODO: Authorize AD user to blob storage container

# az ad signed-in-user show --query objectId -o tsv | az role assignment create \
#     --role "Storage Blob Data Contributor" \
#     --assignee @- \
#     --scope "/subscriptions/<subscription>/resourceGroups/<resource-group>/providers/Microsoft.Storage/storageAccounts/<storage-account>"

#Download podcast file
PODCAST_URL = https://hwcdn.libsyn.com/p/5/5/0/55025959326733f5/EP.181_-_Charlie_Songhurst_FINAL.mp3?c_id=77619212&forcedn=attachment&cs_id=77619212&expiration=1604960032&hwt=5ba8f48c51041fb4eb8549e182f759cb 

curl $PODCAST_URL --output podcast.mp3

#Convert to PCM16 Mono Wav
ffmpeg -t 300 -i podcast.mp3 -acodec pcm_s16le -ac 1 -ar 16000 podcast_mono.wav

# (Optional) Slice audio to first 5 minutes (for testing)
# ffmpeg -t 300 -i podcast.mp3 -acodec pcm_s16le -ac 1 -ar 16000 podcast_mono_short.wav

#Upload file to Blob storage
az storage blob upload --account-name $STORAGE_ACCOUNT_NAME --container-name $STORAGE_CONTAINER_NAME --name podcast.mp3 --file podcast.mp3 --auth-mode login

#List blobs from conatiner
az storage blob list --account-name $STORAGE_ACCOUNT_NAME --container-name $STORAGE_CONTAINER_NAME --output table --auth-mode login

#TODO: Generate swagger codegen client programattically
#curl https://westus.dev.cognitive.microsoft.com/docs/services/speech-to-text-api-v3-0/export?DocumentFormat=Swagger&ApiName=Speech%20to%20Text%20API%20v3.0 --output api.json
# swagger-gen api.json


In [None]:

import logging
import sys
import requests
import time
import swagger_client as cris_client
import azure.cognitiveservices.speech as speechsdk
import datetime
import dotmap from DotMap

config = DotMap({
    subscription_key = "",
    service_region = "westus2",
    project_name = "ANUJBH Podcast Transcription",
    project_description = "Azure Speech Podcast Transcription and Diarization",
    locale = "en-US",
    #TODO: Replace the blob_uri with your own publicly accessible MONO PCM16 WAV or MP3 audio file
    blob_uri = "https://anujbhdiarizationstorage.blob.core.windows.net/anujbhdiarizationstoragecontainer/podcast_mono_300.wav?sp=rl&st=2020-11-09T23:57:42Z&se=2020-11-10T23:57:42Z&sv=2019-12-12&sr=b&sig=LDK3eOayQ7ov9%2Fnj4dJX0QYdyc1gZWaUpVb1kjL4c0M%3D"
    #TODO: This is the full podcast. It can take up to 30-45 min
    blob_uri_long = """https://anujbhdiarizationstorage.blob.core.windows.net/anujbhdiarizationstoragecontainer/podcast_mono.wav?sp=rl&st=2020-11-10T03:44:36Z&se=2021-11-11T03:44:00Z&sv=2019-12-12&sr=b&sig=b1OeEw%2FpXwQmta75NHG7KBGiBxKCW%2FKEuAntUBOyijk%3D"""

})


In [None]:
configuration = cris_client.Configuration()
configuration.api_key["Ocp-Apim-Subscription-Key"] = config.subscription_key
configuration.host = f"https://{config.service_region}.api.cognitive.microsoft.com/speechtotext/v3.0"

client = cris_client.ApiClient(configuration)
api = cris_client.DefaultApi(api_client=client)

properties = {
    "wordLevelTimestampsEnabled": True,
    "diarizationEnabled": True,
}

transcription_definition = cris_client.Transcription(
    display_name=config.project_name,
    description=config.project_description,
    locale=config.locale,
    content_urls=[config.blob_uri],
    properties=properties
)

created_transcription, status, headers = api.create_transcription_with_http_info(transcription=transcription_definition)


def _paginate(api, paginated_object):
    """
    The autogenerated client does not support pagination. This function returns a generator over
    all items of the array that the paginated object `paginated_object` is part of.
    """
    yield from paginated_object.values
    typename = type(paginated_object).__name__
    auth_settings = ["apiKeyHeader", "apiKeyQuery"]
    while paginated_object.next_link:
        link = paginated_object.next_link[len(api.api_client.configuration.host):]
        paginated_object, status, headers = api.api_client.call_api(link, "GET",
            response_type=typename, auth_settings=auth_settings)

        if status == 200:
            yield from paginated_object.values
        else:
            raise Exception(f"could not receive paginated data: status {status}")



transcription_id = headers["location"].split("/")[-1]
print(f"Created new transcription with id '{transcription_id}' in region {SERVICE_REGION}\n")
print(f"Transcribing file at URI: {config.blob_uri}")

completed = False
start_time = datetime.now()

while not completed:
    # wait for 5 seconds before refreshing the transcription status
    time.sleep(5)

    transcription = api.get_transcription(transcription_id)

    elapsed_time = datetime.now() - start_time
    print(f"Transcriptions status: {transcription.status} - Elapsed Time: {elapsed_time.total_seconds()/60} minutes")

    if transcription.status in ("Failed", "Succeeded"):
        completed = True

    if transcription.status == "Succeeded":
        pag_files = api.get_transcription_files(transcription_id)
        for file_data in _paginate(api, pag_files):
            if file_data.kind != "Transcription":
                continue

            audiofilename = file_data.name
            results_url = file_data.links.content_url
            results = requests.get(results_url)

            #Save file to disk for evaluation
            output_path = f"output{ datetime.now().strftime("%H_%M_%S__%m_%d_%Y") }.json"
            with open(output_path, "wb") as output_file:
                output_file.write(results.content)

            
            print(f"Processed results for: {audiofilename}")
            print(f"Saved output to {output_path}")
            
            print(f"Total Transcription Time: { elapsed_time.total_seconds()/60 } minutes")

            # print(f"Results for {audiofilename}:\n{results.content.decode('utf-8')}")
    elif transcription.status == "Failed":
        print(f"Transcription failed: {transcription.properties.error.message}")


In [None]:
with open

# Notes

Azure Speech Python SDK does not support Azure Batch Transcription. Use this Swagger Client as a workaround.


https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/batch/python/python-client/main.py
