In [21]:
from pprint import pprint
import pandas as pd
import ipywidgets as widgets
import torch
import whisper
from IPython.display import clear_output, display

from unified_desktop import RESOURCES_DIR
from unified_desktop.pipelines import UDSpeechRecognizer
from unified_desktop.pipelines import UDIntentClassification

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Automatic Speech Recognition (ASR)

In [None]:
def update_asr_obj(change):
    global asrObj
    clear_output()
    display(model_dropdown, device_dropdown)
    asrObj = UDSpeechRecognizer(name=model_dropdown.value, device=device_dropdown.value)
    print(f"Loaded model: {model_dropdown.value} on device: {device_dropdown.value}")


# Dropdown for OpenAI Whisper models
model_dropdown = widgets.Dropdown(
    options=whisper.available_models(),
    value="tiny.en",
    description="ModelName:",
)


cuda_options = [torch.device("cuda", idx) for idx in range(torch.cuda.device_count())]
device_dropdown = widgets.Dropdown(
    options=["cpu"] + cuda_options,
    value="cpu",
    description="Device:",
)

# Attach the update function to the dropdown
model_dropdown.observe(update_asr_obj, names="value")
device_dropdown.observe(update_asr_obj, names="value")

# Display the widgets and initialize asrObj
display(model_dropdown, device_dropdown)
asrObj = UDSpeechRecognizer(name=model_dropdown.value, device=device_dropdown.value)

In [None]:
# Audio file to transcribe
audio_file = (
    RESOURCES_DIR
    / "call-center-sample-en_US"
    / "en_US_7a4f56d7-9aca-4ed5-96b9-9c9c36b8a3ac.wav"
)
# Use fp16 if on CUDA, else fp32
fp16 = device_dropdown.value in cuda_options

# Transcribe the audio file
transcribed_text = asrObj(audio_file, verbose=True, fp16=fp16)
pprint(transcribed_text)

# Intent Detection

In [22]:
# Input text for intent detection
excel_file = RESOURCES_DIR/"input_text"/"textqueries.xlsx"

# Specify the row number (k) you want to select (replace with the desired row number)
k = 2  # Example: selecting row 2

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)

# Check if the specified row number (k) is valid
if 0 <= k < len(df):
    # Select the specified row
    selected_row = df.iloc[k]

    # Convert the selected row to a string
    input_text = selected_row.to_string(index=False)

    # Display the selected row as a string
    print(f"Row {k + 1} as a string:")
    print(input_text)
        
top_k = 5

# Instantiate the intent detection model
intent_detector = UDIntentClassification()

# Perform intent detection
intent_results = intent_detector(input_text)

# Print the top-5 predictions and their probabilities for the intent detection
print("Top-5 Intent Predictions:")
print("------------------------------------------------------")
print("| Intent                    | Probability")
print("------------------------------------------------------")
for intent, probability in intent_results[:top_k]:
    print(f"| {intent:<25} | {probability:.4f}")
print("------------------------------------------------------")

Row 3 as a string:
      - I do not know how to cancel an order I ...
Top-5 Intent Predictions:
------------------------------------------------------
| Intent                    | Probability
------------------------------------------------------
| cancel_order              | 0.9936
| track_order               | 0.0006
| delete_account            | 0.0006
| change_order              | 0.0005
| change_shipping_address   | 0.0004
------------------------------------------------------
