# Exploring Azure Text Analytics for Chapter Titles

## Setting up Speech SDK and Speech Recognizer

In [1]:
import os
import time
from dotenv import load_dotenv
import azure.cognitiveservices.speech as speechsdk

In [2]:
# Set up subscription info for the Speech Service
load_dotenv() # Load environment variables such as Speech SDK API keys
AZURE_SPEECH_KEY = os.getenv("SPEECHSDK_API_KEY")
AZURE_SERVICE_REGION = os.getenv("SPEECHSDK_REGION")

In [4]:
import re
import textgrid

def extract_text_from_textgrid(file_path, tier_name=None):
    # Load the TextGrid file
    tg = textgrid.TextGrid.fromFile(file_path)
    
    # Find the relevant tier (if tier_name is provided)
    if tier_name:
        tier = tg.getFirst(tier_name)
    else:
        # If no specific tier is mentioned, extract from the first tier
        tier = tg[0]

    # Extract the intervals with text and concatenate them
    extracted_text = []
    for interval in tier:
        if interval.mark.strip():  # Only consider non-empty intervals
            extracted_text.append(interval.mark)
    
    # Return all the concatenated text
    return " ".join(extracted_text)

def remove_intents(text):
    """
    Remove intents marked by <UNSURE>, <UNIN/>, etc. from the text.
    """
    # Regular expression pattern to match any text within angle brackets including the brackets
    pattern = r'<[^>]*>'
    # Use re.sub to replace matches with an empty string
    cleaned_text = re.sub(pattern, '', text)
    # Optionally, you can remove extra spaces left after removing tags
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    return cleaned_text

In [5]:
# Initialize Speech Service
def initialize_speech_service(audio_file_path):
    # speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SERVICE_REGION)
    speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SERVICE_REGION)
    audio_config = speechsdk.audio.AudioConfig(filename=audio_file_path)
    return speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

# Perform transcription on the audio file
def transcribe_continuous_audio_file(recognizer):
    recognized_speech = []

    # Set a variable to manage the state of transcription
    done = False

    def handle_recognized(evt):
        print(f"Recognized: {evt.result.text}")
        recognized_speech.append(evt.result.text)
    
    def handle_canceled(evt):
        print(f"Recognition canceled: {evt.result.reason}")
        if evt.result.reason == speechsdk.CancellationReason.Error:
            print(f"Error details: {evt.result.error_details}")

        recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    # Attach handlers for recognized results and any cancellations
    recognizer.recognized.connect(handle_recognized)
    recognizer.canceled.connect(handle_canceled)
    
    # Start continuous recognition
    recognizer.start_continuous_recognition() # transcribe longer audio sessions or multiple sentences and handle the results asynchronously.
    print("Transcribing...")

    # Wait for completion (i.e., done = True)
    try:
        import time
        while not done:
            time.sleep(.5)
    except KeyboardInterrupt:
        print("Transcription stopped by user.")
        recognizer.stop_continuous_recognition()

    return " ".join(recognized_speech)  # Return all the transcriptions concatenated

In [6]:
recognizer = initialize_speech_service('../data/primock/day1_consultation01_doctor.wav') 
hypothesis_text = transcribe_continuous_audio_file(recognizer)

Transcribing...
Recognized: Hello.
Recognized: Hi. Yeah. OK. Hello. Good morning. So how can I help you this morning?
Recognized: Yeah, I'm sorry to hear that. And when you say diarrhoea, what do you mean by diarrhea? Do you mean you're going to the toilet more often or are your stools more loose?
Recognized: OK. And how many times a day are you going, let's say over the last couple of days?
Recognized: 6-7 times a day and you mentioned this mainly water tree. Have you noticed any other things like blood in your stools?
Recognized: OK. And you mentioned you've had some pain in your tummy as well. Whereabouts is the pain exactly?
Recognized: One side. And what side is that?
Recognized: That's right. OK. And can you describe the pain to me?
Recognized: OK. And there's a pain. Is that is it there all the time or does it come and go?
Recognized: Does the pain move anywhere else because on between your back?
Recognized: OK, fine. And you mentioned you've been feeling quite weak and shaky as

In [7]:
hypothesis_text

"Hello. Hi. Yeah. OK. Hello. Good morning. So how can I help you this morning? Yeah, I'm sorry to hear that. And when you say diarrhoea, what do you mean by diarrhea? Do you mean you're going to the toilet more often or are your stools more loose? OK. And how many times a day are you going, let's say over the last couple of days? 6-7 times a day and you mentioned this mainly water tree. Have you noticed any other things like blood in your stools? OK. And you mentioned you've had some pain in your tummy as well. Whereabouts is the pain exactly? One side. And what side is that? That's right. OK. And can you describe the pain to me? OK. And there's a pain. Is that is it there all the time or does it come and go? Does the pain move anywhere else because on between your back? OK, fine. And you mentioned you've been feeling quite weak and shaky as well. What do you mean by shaky? Do you mean you've been having, have you been feeling feverish, for example? Measure your temperature then. OK OK

## Set up Azure OpenAI

In [78]:
# Set up OpenAI API key and endpoint
openai_api_key = 'OPENAI_API_KEY'  # Replace with your API key
openai_api_base = "OPENAI_API_ENDPOINT"  # Replace with your resource's URL

In [53]:
from openai import AzureOpenAI

# Set up Azure OpenAI client
client = AzureOpenAI(
    api_version="2024-02-01", # Make sure to use the correct API version
    api_key=openai_api_key,
    azure_endpoint=openai_api_base,
)

### Generate Chapter Title by Parsing Text into GPT-35-Turbo

GPT-35-Turbo is used for generating chapter title as it is the cheapest GPT model and it is sufficient for this function.

In [61]:
# Function to generate chapter titles using Azure OpenAI
def generate_chapter_title_nb(text):
    # The prompt asks for a brief summary suitable as a chapter title
    messages = [
        {
            "role": "system",
            "content": "You are an assistant that generates concise chapter titles based on text."
        },
        {
            "role": "user",
            "content": f"Generate a concise chapter title for the following text:\n\n{text}. Don't include a heading such as 'Chapter Title:'."
        }
    ]

    # Request to the Azure OpenAI API using chat completions
    completion = client.chat.completions.create(
        model="gpt-4o-mini",  # Replace with your Azure GPT model deployment name, e.g., 'gpt-35-turbo'
        messages=messages,
        max_tokens=20,  # Limit tokens for shorter, concise titles
        temperature=0.7,  # Adjust to control creativity level
        n=1,  # Number of responses
    )

    # Extract and return the generated title
    title = completion.choices[0].message.content.strip()
    return title

# Generate a chapter title based on the transcribed text
chapter_title = generate_chapter_title_nb(hypothesis_text)
print(f"Generated Chapter Title: {chapter_title}")

Generated Chapter Title: Gastroenteritis Consultation


## Using Function from chaptertitle.py

In [55]:
import sys

sys.path.append(os.path.abspath('..'))
from chaptertitle import generate_chapter_title

In [56]:
document_text = """
In this chapter, we discuss the importance of data privacy in modern digital systems. 
We explore various encryption techniques and their roles in securing user data.
"""

In [63]:
chapter_title_fn_test = generate_chapter_title(hypothesis_text, openai_api_key, openai_api_base)
print(f"Test Generate Chapter Title Function: {chapter_title_fn_test}")

Test Generate Chapter Title Function: "Diagnosis and Treatment for Gastroenteritis"


In [64]:
chapter_title_fn_test2 = generate_chapter_title(document_text, openai_api_key, openai_api_base)
print(f"Test Generate Chapter Title Function: {chapter_title_fn_test2}")

Test Generate Chapter Title Function: "Data Privacy and Encryption in Modern Digital Systems"
