In [None]:
import os
import sys
import time
import queue
import numpy as np
import sounddevice as sd
import tensorflow as tf
import librosa

# =========================
#           SETTINGS
# =========================
MODEL_PATH        = r"/path/to/drone_resnet_final_form.keras"  # <-- change
TARGET_SR         = 16000
WIN_SECONDS       = 1.0
OVERLAP_SECONDS   = 0.3
STEP_SECONDS      = WIN_SECONDS - OVERLAP_SECONDS  # 0.7 s

# MFCC config (13x40 from 1 s @ 16 kHz)
N_MFCC            = 13
FRAMES_PER_SEC    = 40
HOP_LENGTH        = TARGET_SR // FRAMES_PER_SEC   # 16000/40 = 400
WIN_LENGTH        = 400
N_FFT             = 1024

# Classification output index and threshold
DRONE_CLASS_INDEX = 1      # set to 0 if your softmax order is [drone, no_drone]
PRINT_THRESHOLD   = 0.50

# ====== MIC CALIBRATION (apply to audio BEFORE MFCC) ======
# From your MATLAB reference:
# Mul_fac = ((10^6.1925)/10^0.0475)*(20e-6)
MUL_FAC = ((10.0**6.1925) / (10.0**0.0475)) * (20e-6)
APPLY_CALIBRATION = True   # keep True to feed calibrated pressure into MFCC
# ==========================================================

SAVE_MFCC_NPY     = False
SAVE_DIR          = "./realtime_mfcc"

# =========================
#     DEVICE SELECTION
# =========================
def find_umik_index():
    """Return input device index for UMIK-1, or None if not found."""
    try:
        for i, d in enumerate(sd.query_devices()):
            if d.get("max_input_channels", 0) > 0 and "umik" in d.get("name","").lower():
                return i
    except Exception:
        pass
    return None

# =========================
#   FEATURE EXTRACTION
# =========================
def mfcc_13x40(signal_16k):
    """signal_16k: 1-D float32 @16kHz. Returns (13,40,1)."""
    y = np.asarray(signal_16k, dtype=np.float32).flatten()

    # exact 1.0 s @ 16k samples
    if y.size < TARGET_SR:
        y = np.pad(y, (0, TARGET_SR - y.size), mode="constant")
    elif y.size > TARGET_SR:
        y = y[:TARGET_SR]

    mfcc = librosa.feature.mfcc(
        y=y, sr=TARGET_SR, n_mfcc=N_MFCC,
        n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH, center=False
    )  # (13, ~40)

    if mfcc.shape[1] < FRAMES_PER_SEC:
        mfcc = np.pad(mfcc, ((0,0),(0,FRAMES_PER_SEC - mfcc.shape[1])), mode="constant")
    else:
        mfcc = mfcc[:, :FRAMES_PER_SEC]

    return mfcc[..., np.newaxis].astype(np.float32)  # (13,40,1)

# =========================
#        MAIN LOOP
# =========================
def main():
    # 1) Require UMIK-1
    device_index = find_umik_index()
    if device_index is None:
        print("[ERROR] UMIK-1 microphone not found. Connect it and try again.")
        sys.exit(1)
    print(f"[INFO] UMIK-1 found at input device index: {device_index}")

    # 2) Load model
    print(f"[INFO] Loading model: {MODEL_PATH}")
    model = tf.keras.models.load_model(MODEL_PATH)

    # 3) Open stream (request 16k; we still enforce 16k after capture)
    q = queue.Queue()

    def audio_cb(indata, frames, time_info, status):
        if status:
            print(f"[AUDIO WARN] {status}", flush=True)
        q.put(indata.copy())

    requested_sr = TARGET_SR
    print(f"[INFO] Opening stream (requested_sr={requested_sr}) …")
    with sd.InputStream(
        samplerate=requested_sr,
        channels=1,
        dtype="float32",
        callback=audio_cb,
        device=device_index,
        blocksize=0
    ) as stream:
        actual_sr = int(stream.samplerate)
        print(f"[INFO] Stream active. ACTUAL device rate = {actual_sr} Hz")

        # accumulate raw audio at 'actual_sr', then resample to 16k
        step_len_actual = int(round(STEP_SECONDS * actual_sr))    # ~0.7 s
        pending = np.zeros(0, dtype=np.float32)

        # Overlap in 16k domain
        overlap_len_16k = int(round(OVERLAP_SECONDS * TARGET_SR))  # 4800 samples
        tail_16k = np.zeros(overlap_len_16k, dtype=np.float32)

        if SAVE_MFCC_NPY:
            os.makedirs(SAVE_DIR, exist_ok=True)

        print("[INFO] Press Ctrl+C to stop.\n")
        counter = 0
        while True:
            try:
                # Accumulate until we have one step (~0.7 s at actual_sr)
                while pending.size < step_len_actual:
                    fresh = q.get()[:, 0]  # mono float32
                    pending = np.concatenate([pending, fresh], axis=0)

                fresh_step_actual = pending[:step_len_actual]
                pending = pending[step_len_actual:]

                # Resample that step to 16 kHz
                fresh_step_16k = librosa.resample(
                    y=fresh_step_actual, orig_sr=actual_sr, target_sr=TARGET_SR, res_type="kaiser_best"
                )

                # Compose 1.0 s @ 16 kHz: [last 0.3 s] + [new 0.7 s]
                window_16k = np.concatenate([tail_16k, fresh_step_16k], axis=0)

                # Enforce exact 1.0 s length
                if window_16k.size < TARGET_SR:
                    window_16k = np.pad(window_16k, (0, TARGET_SR - window_16k.size), mode="constant")
                elif window_16k.size > TARGET_SR:
                    window_16k = window_16k[-TARGET_SR:]

                # Update tail: last 0.3 s
                tail_16k = window_16k[-overlap_len_16k:].copy()

                # --- Calibration applied to audio before MFCC ---
                signal_for_mfcc = window_16k * MUL_FAC if APPLY_CALIBRATION else window_16k

                # MFCC (13x40x1)
                tile = mfcc_13x40(signal_for_mfcc)

                if SAVE_MFCC_NPY:
                    np.save(os.path.join(SAVE_DIR, f"mfcc_{int(time.time())}_{counter:06d}.npy"), tile)

                # Inference
                probs = model.predict(tile[np.newaxis, ...], verbose=0)[0]
                p_drone = float(probs[DRONE_CLASS_INDEX])

                label = "DRONE" if p_drone >= PRINT_THRESHOLD else "NO-DRONE"
                ts = time.strftime("%H:%M:%S")
                print(f"[{ts}] p(drone)={p_drone:0.3f}  -> {label}")

                counter += 1

            except KeyboardInterrupt:
                print("\n[INFO] Stopped by user.")
                break
            except Exception as e:
                print(f"[ERROR] {e}", flush=True)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'sounddevice'