<a href="https://colab.research.google.com/github/USAFADFCS/final-project-jon-and-ari/blob/main/Ari_Jon_AI_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

  Save to google drive

In [None]:
!pip install -U transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Step 1: Setup- Import Libraries and Load the Model

In [None]:

import os, torch, torchaudio
import numpy as np
from transformers import pipeline
from asr_whisper import transcribe
from vad import simple_vad_chunks
from transformers import AutoProcessor
from asr_whisper import asr, MEDICAL_LEXICON  # your pipeline + vocab

MODEL_ID = "openai/whisper-tiny.en"
DEVICE = 0 if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32

# Build one reusable pipeline
asr = pipeline(
    "automatic-speech-recognition",
    model=MODEL_ID,
    device=DEVICE,
    torch_dtype=DTYPE,
    chunk_length_s=30,           # robust for long audio
    stride_length_s=5,           # overlap for context
    return_timestamps=True
)

MEDICAL_LEXICON = (
    "tourniquet, hemorrhage, capillary refill, obey commands, airway, "
    "respirations, pulse, radial pulse, naloxone, unresponsive, shock"
)

def transcribe(path: str) -> dict:
    return asr(
        path,
        generate_kwargs={
            "task": "transcribe",      # or "translate" if needed
            "temperature": 0.0,
            "num_beams": 5
        },
        # primes decoding with triage vocabulary
        prompt=MEDICAL_LEXICON,
        return_timestamps=True
    )


Device set to use cpu


In [None]:
def simple_vad_chunks(wav_path, min_speech_len=0.6):
    wav, sr = torchaudio.load(wav_path)
    wav = torchaudio.functional.resample(wav, sr, 16000)
    vad = torchaudio.transforms.Vad(sample_rate=16000)
    voiced = vad(wav.squeeze(0))
    # Fallback: if overly aggressive, just return original path
    if voiced.numel() < 16000 * min_speech_len:
        return [wav_path]
    # For brevity, write voiced chunk to temp file; in production, slice windows
    out = "/tmp/voiced.wav"
    torchaudio.save(out, voiced.unsqueeze(0), 16000)
    return [out]

In [None]:
def transcribe_with_vad(path):
    out = {"text": "", "segments": []}
    for chunk in simple_vad_chunks(path):
        r = transcribe(chunk)
        out["text"] += (" " + r["text"]).strip()
        if "chunks" in r:
            out["segments"].extend(r["chunks"])
    return out

In [None]:
TRIAGE_SCHEMA = {
  "patient_id": str,
  "entities": {
    "bleeding_severe": bool,
    "can_walk": bool | None,
    "obeys_commands": bool | None,
    "resp_rate": int | None,
    "cap_refill_sec": float | None,
    "mental_status": str | None
  },
  "evidence": list,              # text snippets / timestamps
  "triage_candidate": str,       # Immediate | Delayed | Minimal | Expectant | Unknown
  "uncertainty": float,
  "next_question": str | None
}

In [None]:
def salt_rules(e, sensors=None):
    s = sensors or {}
    severe_bleed = e.get("bleeding_severe") or s.get("bleeding_detected")
    resp = e.get("resp_rate") or s.get("resp_rate")
    obeys = e.get("obeys_commands")
    can_walk = e.get("can_walk")

    if can_walk is True:
        return "Minimal"
    if severe_bleed:
        return "Immediate"
    if resp is None:
        return "Unknown"
    if resp == 0:
        return "Expectant"
    if obeys is False or (resp and resp >= 30):
        return "Immediate"
    return "Delayed"


In [None]:
print("Device:", asr.device)
print("Torch dtype:", next(asr.model.parameters()).dtype)
print("Task:", asr.task)

MODEL_ID = asr.model.name_or_path
processor = AutoProcessor.from_pretrained(MODEL_ID)

# Build prompt ids and coerce to a Torch tensor on the right device
prompt_ids = processor.get_prompt_ids(text=MEDICAL_LEXICON)

# handle list/np.array return types robustly
if isinstance(prompt_ids, np.ndarray):
    prompt_ids = prompt_ids.tolist()
elif isinstance(prompt_ids, tuple):
    prompt_ids = list(prompt_ids)

prompt_ids = torch.tensor(prompt_ids, dtype=torch.long, device=asr.model.device)

AUDIO = "EnglishTriageTest 1.mp3"  # <-- ensure this exists

# If your model’s generation_config had old forced ids set, clear them:
try:
    asr.model.generation_config.forced_decoder_ids = None
except Exception:
    pass

r = asr(
    AUDIO,
    generate_kwargs={
       # "language": "en",          # prefer flags over forced_decoder_ids
       # "task": "transcribe",
        "prompt_ids": prompt_ids,  # <-- now a torch.tensor
        "temperature": 0.0,
        "num_beams": 5,
        "do_sample": False,
    },
    return_timestamps=True
)

print("Output keys:", list(r.keys()))
print("Text (first 120):", r.get("text", "")[:120])
print("Num segments:", len(r.get("chunks", [])))


Device: cpu
Torch dtype: torch.float32
Task: automatic-speech-recognition


Using custom `forced_decoder_ids` from the (generation) config. This is deprecated in favor of the `task` and `language` flags/config options.


: 