In [2]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import numpy as np

In [4]:
# Load pre-trained model
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)

# Function to detect commands in audio
def detect_command(audio_sample):
    # Perform automatic speech recognition
    result = pipe(audio_sample)
    return result["text"]
    
    # Your command detection logic here
    # Example: Check if the detected text contains a specific command
    # if "command" in text.lower():
    #     return True
    # else:
    #     return False

# Load example audio data
audio_sample = "audio_aaico_challenge.wav"


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Command detected:  Engine 3, this is Bravo Team. We've got heavy smoke and flames visible from the Charlie side. We're going for a defensive attack, over. Galactic temperature. This is Bravo Team. Visibility's near zero, lots of heat. We're gonna hit Charlie hard from the exterior, then make entry from a primary search, over. Galactic battery. Galactic Battery. Keep an eye on that roof. Looks sketchy. Engine 3, better charge the hose. Over. Acknowledged. We've got a 2.5 inch line ready for a blitz attack. We'll knock down the bulk of the fire before we go in. Galactic Oxygen. Pass is active and we're staying on the comms. Going in for the attack now. Stay sharp out there. Over. Bravo going in. Over.


In [5]:
# Detect command in the audio sample
command_detected = detect_command(audio_sample)
print("Command detected:", command_detected)

Command detected:  Engine 3, this is Bravo Team. We've got heavy smoke and flames visible from the Charlie side. We're going for a defensive attack, over. Galactic temperature. This is Bravo Team. Visibility's near zero, lots of heat. We're gonna hit Charlie hard from the exterior, then make entry from a primary search, over. Galactic battery. Galactic Battery. Keep an eye on that roof. Looks sketchy. Engine 3, better charge the hose. Over. Acknowledged. We've got a 2.5 inch line ready for a blitz attack. We'll knock down the bulk of the fire before we go in. Galactic Oxygen. Pass is active and we're staying on the comms. Going in for the attack now. Stay sharp out there. Over. Bravo going in. Over.
