In [None]:
import random
from pprint import pprint

import ipywidgets as widgets
import pandas as pd
import torch
import whisper
from IPython.display import clear_output, display

from unified_desktop import RESOURCES_DIR
from unified_desktop.pipelines import (
    UDIntentClassification,
    UDKeyExtraction,
    UDSpeechRecognizer,
)

pd.options.display.max_colwidth = 100
%load_ext autoreload
%autoreload 2

In [None]:
# Cuda dropdown

cuda_options = [torch.device("cuda", idx) for idx in range(torch.cuda.device_count())]
device_dropdown = widgets.Dropdown(
    options=["cpu"] + cuda_options,
    value="cpu",
    description="Device:",
)
display(device_dropdown)

## Automatic Speech Recognition (ASR)

In [None]:
def update_asr_obj(change):
    global asrObj
    clear_output()
    display(model_dropdown_ASR, device_dropdown)
    asrObj = UDSpeechRecognizer(
        name=model_dropdown_ASR.value, device=device_dropdown.value
    )
    print(
        f"Loaded model: {model_dropdown_ASR.value} on device: {device_dropdown.value}"
    )


# Dropdown for OpenAI Whisper models
model_dropdown_ASR = widgets.Dropdown(
    options=whisper.available_models(),
    value="tiny.en",
    description="ModelName:",
)

# Attach the update function to the dropdown
model_dropdown_ASR.observe(update_asr_obj, names="value")
device_dropdown.observe(update_asr_obj, names="value")

# Display the widgets and initialize asrObj
display(model_dropdown_ASR)
asrObj = UDSpeechRecognizer(name=model_dropdown_ASR.value, device=device_dropdown.value)

In [None]:
# Audio file to transcribe
audio_file = (
    RESOURCES_DIR
    / "call-center-sample-en_US"
    / "en_US_7a4f56d7-9aca-4ed5-96b9-9c9c36b8a3ac.wav"
)
# Use fp16 if on CUDA, else fp32
fp16 = device_dropdown.value in cuda_options

# Transcribe the audio file
transcribed_text = asrObj(audio_file, verbose=True, fp16=fp16)
pprint(transcribed_text)

# Intent Detection

In [None]:
# available_models are the list of all transformer models
# that works well for the IntentDetection purpose.
# More will be added to the list after testing each one.

available_models = ["vineetsharma/customer-support-intent-albert"]


def update_intent_obj(change):
    global intentObj
    clear_output()
    display(model_dropdown_intent, device_dropdown)
    intentObj = UDIntentClassification(
        available_models, name=model_dropdown_intent.value, device=device_dropdown.value
    )
    print(
        f"Loaded model: {model_dropdown_intent.value} on device: {device_dropdown.value}"
    )


# Dropdown for intentDetection transformer models. More models will be added
model_dropdown_intent = widgets.Dropdown(
    options=available_models,
    value="vineetsharma/customer-support-intent-albert",
    # value = "albert-base-v2",
    description="ModelName:",
)

# Attach the update function to the dropdown
model_dropdown_intent.observe(update_intent_obj, names="value")
device_dropdown.observe(update_intent_obj, names="value")

# Display the widgets and initialize intentObj
display(update_intent_obj)
intentObj = UDIntentClassification(
    name=model_dropdown_intent.value,
    device=device_dropdown.value,
)

In [None]:
# Input text for intent detection
excel_file = RESOURCES_DIR / "input_text" / "textqueries.xlsx"

# Read the Excel file into a DataFrame
df = pd.read_excel(excel_file)
# Specify the row number (k) you want to select (replace with the desired row number)
num_rows = df.shape[0]
random_row_index = random.randint(0, num_rows - 1)
selected_row = df.iloc[random_row_index]
# Convert the selected row to a string
intent_input = selected_row.to_string(index=False)

# Display the selected row as a string
print(f"inpute query:")
print(intent_input, "\n")

# returns top_k predictions
top_k = 2

# Perform intent detection
intent_results = intentObj(intent_input, top_k)
# Print the top-k predictions and their probabilities for the intent detection
print(f"Top-{top_k} Intent Predictions:")
print("------------------------------------------------------")
print("| Intent                    | Probability")
print("------------------------------------------------------")
for items in intent_results:
    print(f"| {items['label']:<25} | {items['score']:.4f}")
print("------------------------------------------------------")

# keyword detection

In [None]:
# available_models are the list of all transformer models
# that works well for the Keyword Extraction purpose.
# More will be added to the list after testing each one.

available_models = ["yanekyuk/bert-uncased-keyword-extractor"]


def update_key_obj(change):
    global KeyObj
    clear_output()
    display(model_dropdown_key, device_dropdown)
    KeyObj = UDKeyExtraction(
        available_models,
        name=model_dropdown_key.value,
        device=device_dropdown.value,
    )
    print(
        f"Loaded model: {model_dropdown_key.value} on device: {device_dropdown.value}"
    )


# Dropdown for UDKeyExtraction transformer models. More models will be added
model_dropdown_key = widgets.Dropdown(
    options=available_models,
    value="yanekyuk/bert-uncased-keyword-extractor",
    description="ModelName:",
)

# Attach the update function to the dropdown
model_dropdown_key.observe(update_key_obj, names="value")
device_dropdown.observe(update_key_obj, names="value")

# Display the widgets and initialize keyObj
display(model_dropdown_key)
KeyObj = UDKeyExtraction(name=model_dropdown_key.value, device=device_dropdown.value)

In [None]:
# Input text for keywords extraction
content_input = "Google is being investigated by the UK antitrust watchdog for its dominance in the 'ad tech stack,' the set of services that facilitate the sale of online advertising space between advertisers and sellers. Google has strong positions at various levels of the ad tech stack and charges fees to both publishers and advertisers. A step back: UK Competition and Markets Authority has also been investigating whether Google and Meta colluded over ads, probing into the advertising agreement between the two companies, codenamed Jedi Blue."

# Display the selected row as a string
print(f"inpute query:")
print(content_input, "\n")

# Perform keywords extraction
key_results = KeyObj(content_input)
print("keywords extraction:")
print("------------------------------------------------------")
print("| index   | keyword               | Probability")
print("------------------------------------------------------")

for index, word, score in key_results:
    print(f"| {index:<5} | {word:<23} | {score:.4f}")

print("------------------------------------------------------")