In [1]:
import os
import sys
import time
import queue
import numpy as np
import sounddevice as sd
import tensorflow as tf
import librosa

# =========================
#         SETTINGS
# =========================
MODEL_PATH        = r"/path/to/drone_resnet_final_form.keras"  # <- change
SAMPLE_RATE       = 16000
WIN_SECONDS       = 1.0       # inference window length
OVERLAP_SECONDS   = 0.3       # amount to carry over from previous window
STEP_SECONDS      = WIN_SECONDS - OVERLAP_SECONDS  # 0.7 s new audio each step

N_MFCC            = 13
FRAMES_PER_SEC    = 40
HOP_LENGTH        = SAMPLE_RATE // FRAMES_PER_SEC   # 16000/40 = 400
WIN_LENGTH        = 400                             # 25 ms
N_FFT             = 1024

DRONE_CLASS_INDEX = 1    # softmax index of "drone" (change to 0 if needed)
PRINT_THRESHOLD   = 0.50 # optional label decision threshold
SAVE_MFCC_NPY     = False
SAVE_DIR          = "./realtime_mfcc"

# =========================
#     DEVICE SELECTION
# =========================
def find_umik_index():
    """Return input device index for UMIK-1, or None if not found."""
    try:
        devices = sd.query_devices()
        for i, d in enumerate(devices):
            name = d.get("name", "")
            if d.get("max_input_channels", 0) > 0 and "umik" in name.lower():
                return i
    except Exception:
        pass
    return None

# =========================
#   FEATURE EXTRACTION
# =========================
def mfcc_13x40(signal, sr=SAMPLE_RATE):
    """Compute 13x40 MFCC tile from a 1s mono signal @16k (pad/truncate to 40 frames)."""
    y = np.asarray(signal, dtype=np.float32).flatten()
    mfcc = librosa.feature.mfcc(
        y=y, sr=sr, n_mfcc=N_MFCC,
        n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH,
        center=False
    )  # (13, ~40)

    # force exactly 40 time frames
    if mfcc.shape[1] < FRAMES_PER_SEC:
        pad = FRAMES_PER_SEC - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad)), mode="constant")
    else:
        mfcc = mfcc[:, :FRAMES_PER_SEC]

    return mfcc[..., np.newaxis].astype(np.float32)  # (13,40,1)

# =========================
#        MAIN LOOP
# =========================
def main():
    # 1) Require UMIK-1
    device_index = find_umik_index()
    if device_index is None:
        print("[ERROR] UMIK-1 microphone not found. Aborting. "
              "Connect the UMIK-1 and try again.")
        sys.exit(1)

    print(f"[INFO] UMIK-1 found at input device index: {device_index}")

    # 2) Load model
    print(f"[INFO] Loading model: {MODEL_PATH}")
    model = tf.keras.models.load_model(MODEL_PATH)

    # 3) Prepare overlap buffers
    win_len  = int(WIN_SECONDS * SAMPLE_RATE)       # 16000
    step_len = int(STEP_SECONDS * SAMPLE_RATE)      # 11200 (0.7 s)
    ovl_len  = int(OVERLAP_SECONDS * SAMPLE_RATE)   # 4800  (0.3 s)

    # tail starts as zeros for the very first window
    tail = np.zeros(ovl_len, dtype=np.float32)

    # We’ll request exactly 0.7 s per callback and stitch with 0.3 s tail
    blocksize = step_len
    q = queue.Queue()

    if SAVE_MFCC_NPY:
        os.makedirs(SAVE_DIR, exist_ok=True)

    def audio_callback(indata, frames, time_info, status):
        if status:
            # non-fatal — just log driver messages
            print(f"[AUDIO WARN] {status}", flush=True)
        # push the fresh 0.7 s chunk
        q.put(indata.copy())

    # 4) Start stream
    print(f"[INFO] Starting stream @ {SAMPLE_RATE} Hz, device={device_index}, "
          f"step={STEP_SECONDS:.2f}s ({blocksize} samples), overlap={OVERLAP_SECONDS:.2f}s")
    with sd.InputStream(
        samplerate=SAMPLE_RATE,
        channels=1,
        dtype="float32",
        callback=audio_callback,
        blocksize=blocksize,
        device=device_index
    ):
        print("[INFO] Press Ctrl+C to stop.\n")
        counter = 0
        while True:
            try:
                # Get the newest 0.7 s chunk (shape: [step_len, 1])
                fresh = q.get()
                fresh = fresh[:, 0]  # mono

                # Compose 1.0 s window: [last 0.3 s] + [new 0.7 s]
                window = np.concatenate([tail, fresh], axis=0)  # length = 16000

                # Next tail is the last 0.3 s of this 1.0 s window
                tail = window[-ovl_len:].copy()

                # Extract MFCC (13x40x1)
                tile = mfcc_13x40(window, sr=SAMPLE_RATE)
                if SAVE_MFCC_NPY:
                    np.save(os.path.join(SAVE_DIR, f"mfcc_{int(time.time())}_{counter:06d}.npy"), tile)

                # Inference
                probs = model.predict(tile[np.newaxis, ...], verbose=0)[0]
                p_drone = float(probs[DRONE_CLASS_INDEX])

                # Pretty print
                label = "DRONE" if p_drone >= PRINT_THRESHOLD else "NO-DRONE"
                ts = time.strftime("%H:%M:%S")
                print(f"[{ts}] p(drone)={p_drone:0.3f}  -> {label}")

                counter += 1

            except KeyboardInterrupt:
                print("\n[INFO] Stopped by user.")
                break
            except Exception as e:
                # Keep running even if a single iteration fails
                print(f"[ERROR] {e}", flush=True)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'sounddevice'