## Setup & Import Library

In [1]:
import requests
import os
import time
import json
from dotenv import load_dotenv
from azure.storage.blob import BlobServiceClient
import textgrids  # uses praat-textgrids package (not textgrid package)
import pandas as pd

from utils import extract_text_from_tg, remove_intents, get_metrics, visualize_alignment



In [2]:
load_dotenv()
AZURE_SPEECH_KEY = os.getenv("SPEECHSDK_API_KEY")
AZURE_SERVICE_REGION = os.getenv("SPEECHSDK_REGION")
DEFAULT_MODEL_ID = '10e98dd4-3d36-4296-b383-3508d63b1e0b'
WHISPER_LARGE_V2_MODEL_ID = 'ad38cffe-d981-4cf2-a062-4108c3c2b48f'


CONTENT_CONTAINER_SAS_URL = 'https://nuscapstonewhisper.blob.core.windows.net/primock57audio?sp=rl&st=2024-09-29T12:14:30Z&se=2024-11-30T20:14:30Z&spr=https&sv=2022-11-02&sr=c&sig=n%2F%2FBIw12rtGnRJFL8aKK0YWv5Pi2ktLy%2Fg8Xi8ieuZA%3D'
DESTINATION_CONTAINER_SAS_URL = 'https://nuscapstonewhisper.blob.core.windows.net/primock57write?sp=racwdl&st=2024-09-29T12:15:25Z&se=2024-11-30T20:15:25Z&spr=https&sv=2022-11-02&sr=c&sig=L%2F8JPzlU9KVT1XdVAZspqifbCfhvC9g%2FvaPC2YNWCO4%3D'

url = f"https://{AZURE_SERVICE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions"

## Transcription

Using the batch transcription API

### Setup Transcription Payload

In [3]:
payload = f'''
{{
    "contentContainerUrl": "{CONTENT_CONTAINER_SAS_URL}",
    "properties": {{
      "diarizationEnabled": true,
      "displayFormWordLevelTimestampsEnabled": true,
      "wordLevelTimestampsEnabled": false,
      "punctuationMode": "DictatedAndAutomatic",
      "destinationContainerUrl": "{DESTINATION_CONTAINER_SAS_URL}",
    }},
    "model": {{
      "self": "https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/models/base/{WHISPER_LARGE_V2_MODEL_ID}",  
    }},
    "locale": "en-US",
    "displayName": "Transcription 1"
}}
'''

headers = {
  'Ocp-Apim-Subscription-Key': f'{AZURE_SPEECH_KEY}',
  'Content-Type': 'application/json'
}

post_response = requests.request("POST", url, headers=headers, data=payload)

print(post_response.text)   # response from API about the payload sent

{
  "self": "https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions/c21dfe23-65d2-46d9-8c26-4fe2a2d088f9",
  "model": {
    "self": "https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/models/base/ad38cffe-d981-4cf2-a062-4108c3c2b48f"
  },
  "links": {
    "files": "https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions/c21dfe23-65d2-46d9-8c26-4fe2a2d088f9/files"
  },
  "properties": {
    "diarizationEnabled": true,
    "wordLevelTimestampsEnabled": false,
    "displayFormWordLevelTimestampsEnabled": true,
    "channels": [
      0,
      1
    ],
    "punctuationMode": "DictatedAndAutomatic",
    "profanityFilterMode": "Masked",
    "destinationContainerUrl": "https://nuscapstonewhisper.blob.core.windows.net/primock57write?sp=racwdl&st=2024-09-29T12:15:25Z&se=2024-11-30T20:15:25Z&spr=https&sv=2022-11-02&sr=c&sig=L%2F8JPzlU9KVT1XdVAZspqifbCfhvC9g%2FvaPC2YNWCO4%3D"
  },
  "lastActionDateTime": "2024-09-29T12:51:42Z

In [4]:
post_response_json = post_response.json()

get_run = post_response_json['self']
get_files = post_response_json['links']['files']

print(get_run)
print(get_files)

https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions/c21dfe23-65d2-46d9-8c26-4fe2a2d088f9
https://southeastasia.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions/c21dfe23-65d2-46d9-8c26-4fe2a2d088f9/files


### Check Transcription Status

In [5]:
running_status = None
wait_string = ''

while running_status not in ['Succeeded','Failed']:

    response = requests.request("GET", get_run, headers=headers)

    running_status = response.json()['status']

    if running_status not in ['Succeeded','Failed']:
        wait_string += '.'
        print(f'{wait_string}{running_status}',end ="\r" )
        time.sleep(10)

print(f'{wait_string}{running_status}')    
try:
    print(response.json()['properties']['error']['message'])
except:
    pass

..............................................Succeeded


### Evaluation 

* Read from destination container for transcripted files
* Evaluate using jiwer
* if want to see json content do `print(json.dumps(json_data, indent=2))`

In [6]:
destination_container_name = "primock57write"   
transcript_container_name = "primock57transcript"
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# use the contentUrl to access the transcripted file if destinationContainerUrl is not provided in payload
get_response = requests.request("GET", get_files, headers=headers)
get_response_json = get_response.json()
values = get_response_json["values"]

transcript_container_client = blob_service_client.get_container_client(transcript_container_name)
all_transcripts = list(transcript_container_client.list_blobs())

metrics_df = pd.DataFrame(columns=['filename', 'mer', 'wil', 'wip', 'wer'])

for i in range(len(values)):
    if values[i]['kind'] == 'Transcription': 
      transcribed_url = values[i]['links']['contentUrl']
      transcribed_file_name = transcribed_url.split(f"{destination_container_name}/")[-1]
      transcribed_blob_client = blob_service_client.get_blob_client(container=destination_container_name, blob=transcribed_file_name)
      print('\n**************************************************')
      print(transcribed_file_name)
      transcribed_data = transcribed_blob_client.download_blob().content_as_text()
      json_data = json.loads(transcribed_data)
      hypothesis_text = json_data['combinedRecognizedPhrases'][0]['display']   # use 'lexical' for raw text


      transcript_file = all_transcripts[i]
      transcript_blob_client = blob_service_client.get_blob_client(container=transcript_container_name, blob=transcript_file.name)

      print(transcript_file.name)
      transcript_data = transcript_blob_client.download_blob().content_as_bytes()

      tg = textgrids.TextGrid()
      tg.parse(transcript_data)
      
      reference_text = extract_text_from_tg(tg)
      cleaned_reference_text = remove_intents(reference_text)

      mer, wil, wip, wer = get_metrics(cleaned_reference_text, hypothesis_text)
      new_row = pd.DataFrame({
          'filename': [transcribed_file_name],
          'mer': [mer],
          'wil': [wil],
          'wip': [wip],
          'wer': [wer]
      })
      metrics_df = pd.concat([metrics_df, new_row], ignore_index=True)
      
      visualize_alignment(cleaned_reference_text, hypothesis_text)

metrics_df.to_excel('whisper-batch.xlsx', index=False)


**************************************************
c21dfe23-65d2-46d9-8c26-4fe2a2d088f9/primock57audio/day1_consultation01_doctor.wav.json
day1_consultation01_doctor.TextGrid


  metrics_df = pd.concat([metrics_df, new_row], ignore_index=True)


sentence 1
REF: hello hi    um should we start yeah okay hello how um good morning sir how can i help you this morning * ** sorry to hear that um and and when you say  diarrhea what would you mean by  diarrhea do you mean you are going to the toilet more often or are your stools more loose okay and how many times a day are you going let us say   in the last couple of days six seven times a day and you   mention it is mainly watery have you noticed any other things like blood in your stools okay and you mentioned you have had some pain in your tummy as well whereabouts is the pain exactly one side and what side is that left side okay  and can you describe the pain to me okay and is the pain is that is it there all the time or does it come and go come and go does the pain move anywhere else for example towards your back okay fine and you mentioned you have been feeling quite weak and shaky as well what do you mean by shaky do you mean you have been having uh have you been feeling feveris