In [11]:
import queue
import sounddevice as sd
import numpy as np
import json
import pyautogui
import time
from vosk import Model, KaldiRecognizer

# === Configuration ===
SAMPLE_RATE = 16000
BLOCK_SIZE = 2000
VOLUME_THRESHOLD = 120000
COOLDOWN = 0.5

# === Model and Recognizer ===
model = Model("vosk-model-small-en-us-0.15")
recognizer = KaldiRecognizer(model, SAMPLE_RATE)
audio_queue = queue.Queue()

# === Keyword Groups ===
instant_groups = {
    "p": ["light", "night", "what", "not", "matt", "like", "lot", "right", "luck", "eight","lights"],
    "z": ["attack", "back", "think", "but", "tap"],
}

last_trigger_time = {key: 0 for key in instant_groups}
is_holding_space = False
space_disabled = False  # ‚õî Á¶ÅÁî®ÈïøÊåâÁ©∫Ê†ºÊ£ÄÊµãÊ†áÂøó

# === Audio Callback ===
def audio_callback(indata, frames, time_info, status):
    global is_holding_space, space_disabled

    if status:
        print(status)

    audio_queue.put(bytes(indata))  # ÁªôËØÜÂà´Âô®Â§ÑÁêÜ

    # ‚úÖ Èü≥ÈáèËß¶ÂèëÁ©∫Ê†ºÈîÆÔºàÂ¶ÇÊûúÊú™Á¶ÅÁî®Ôºâ
    if space_disabled:
        return  # ÂÅúÊ≠¢Èü≥ÈáèÂìçÂ∫î

    audio_np = np.frombuffer(indata, dtype=np.int16)
    volume = np.linalg.norm(audio_np)

    if volume > VOLUME_THRESHOLD:
        if not is_holding_space:
            pyautogui.keyDown('space')
            is_holding_space = True
            print("‚¨áÔ∏è SPACE DOWN by volume trigger")
    else:
        if is_holding_space:
            pyautogui.keyUp('space')
            is_holding_space = False
            print("‚¨ÜÔ∏è SPACE UP by volume drop")

# === Fuzzy Matching ===
def fuzzy_match(text, keywords):
    return any(k in text for k in keywords)

# === Recognition Loop ===
def recognizer_loop():
    global is_holding_space, space_disabled
    print("üéß Listening (volume + keyword)...")

    while True:
        try:
            data = audio_queue.get_nowait()
        except queue.Empty:
            time.sleep(0.001)
            continue

        if recognizer.AcceptWaveform(data):
            result = json.loads(recognizer.Result())
            text = result.get("text", "").lower()

            if text:
                print(f"[FULL TEXT] {text}")  # ÂèØÊ≥®ÈáäÊéâ

                for key, keywords in instant_groups.items():
                    now = time.time()
                    if fuzzy_match(text, keywords) and (now - last_trigger_time[key] > COOLDOWN):
                        pyautogui.press(key)
                        last_trigger_time[key] = now
                        print(f"üîò {text} -> press [{key}]")

                        # ‚úÖ Ê£ÄÊµãÂà∞È¶ñÊ¨°Êåâ‰∏ã z ÈîÆÔºåÁ¶ÅÁî®Á©∫Ê†ºÊ£ÄÊµã
                        if key == "z" and not space_disabled:
                            space_disabled = True
                            if is_holding_space:
                                pyautogui.keyUp('space')
                                print("‚¨ÜÔ∏è SPACE UP by disable trigger")
                                is_holding_space = False
                            print("üö´ SPACE detection permanently disabled due to [Z] key")

# === Main Entry ===
def main():
    with sd.RawInputStream(
        samplerate=SAMPLE_RATE,
        blocksize=BLOCK_SIZE,
        dtype='int16',
        channels=1,
        callback=audio_callback
    ):
        recognizer_loop()

if __name__ == "__main__":
    main()


LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=10 max-active=3000 lattice-beam=2
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from vosk-model-small-en-us-0.15/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from vosk-model-small-en-us-0.15/graph/HCLr.fst vosk-model-small-en-us-0.15/graph/Gr.fst
LOG (VoskAPI:ReadDataFiles():model.cc:303) Loading winfo vosk-model-small-en-us-0.15/graph/phones/word_boundary.int


üéß Listening (volume + keyword)...
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] fed up with this are struggling million your money into well with on and kid work on
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] i ah
[FULL TEXT] can try and enjoy yeah
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] i
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] that
[FULL TEXT] ice cream
‚¨áÔ∏è SPACE DOWN by volume trigger
[FULL TEXT] yeah
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] will be a lot of times are other things just
üîò will be a lot of times are other things just -> press [p]
‚¨áÔ∏è SPACE DOWN by volume trigger
‚¨ÜÔ∏è SPACE UP by volume drop
[FULL TEXT] so
[FULL TEXT] yeah
‚¨áÔ∏è SPACE DOWN by volume trigger
[FULL TEXT] the dollars you
‚¨ÜÔ∏è SPACE UP by volume drop
‚¨áÔ∏è SPACE DOWN by volume 

KeyboardInterrupt: 