In [None]:
import os
import numpy as np
import tensorflow_hub as hub
import librosa
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

yamnet = hub.load("https://tfhub.dev/google/yamnet/1")

class_map = {'motorcycle': 1, 'background': 0}
data_dir = 'data'
X, y = [], []

def extract_embeddings(filepath):
    waveform, sr = librosa.load(filepath, sr=None, mono=False)

    # –ü–µ—Ä–µ—Ç–≤–æ—Ä–µ–Ω–Ω—è —Å—Ç–µ—Ä–µ–æ –≤ –º–æ–Ω–æ
    if len(waveform.shape) > 1:
        waveform = np.mean(waveform, axis=0)

    # –†–µ—Å–µ–º–ø–ª—ñ–Ω–≥ –¥–æ 16 –∫–ì—Ü
    if sr != 16000:
        waveform = librosa.resample(waveform, orig_sr=sr, target_sr=16000)

    # –ü–µ—Ä–µ—Ç–≤–æ—Ä–µ–Ω–Ω—è –≤ float32
    waveform = waveform.astype(np.float32)

    # –í–∏—Ç—è–≥ embedding'—ñ–≤
    scores, embeddings, _ = yamnet(waveform)
    return embeddings.numpy()

for label_name, label_id in class_map.items():
    folder = os.path.join(data_dir, label_name)
    for file in tqdm(os.listdir(folder), desc=f"Processing {label_name}"):
        if file.endswith(".wav"):
            path = os.path.join(folder, file)
            embs = extract_embeddings(path)
            for emb in embs:
                X.append(emb)
                y.append(label_id)

X = np.array(X)
y = np.array(y)

os.makedirs("embeddings", exist_ok=True)
np.save("embeddings/X.npy", X)
np.save("embeddings/y.npy", y)

# –ù–∞–≤—á–∞–Ω–Ω—è –∫–ª–∞—Å–∏—Ñ—ñ–∫–∞—Ç–æ—Ä–∞
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
clf = LogisticRegression(max_iter=1000).fit(X_train, y_train)

# –ó–±–µ—Ä–µ–∂–µ–Ω–Ω—è
os.makedirs("model", exist_ok=True)
joblib.dump(clf, "model/classifier.pkl")

# –†–µ–∑—É–ª—å—Ç–∞—Ç–∏
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=class_map.keys()))













Processing motorcycle: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 518/518 [00:20<00:00, 25.54it/s]
Processing background: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 518/518 [00:13<00:00, 37.79it/s]


              precision    recall  f1-score   support

  motorcycle       0.97      0.95      0.96       822
  background       0.96      0.97      0.97      1036

    accuracy                           0.97      1858
   macro avg       0.97      0.96      0.96      1858
weighted avg       0.97      0.97      0.96      1858



In [None]:
import tensorflow_hub as hub
import sounddevice as sd
import numpy as np
import joblib
import librosa

yamnet = hub.load("https://tfhub.dev/google/yamnet/1")
clf = joblib.load("model/classifier.pkl")

SAMPLERATE = 16000
DURATION = 1.0  # 1 —Å–µ–∫—É–Ω–¥–∞

def classify_audio(audio, sr):
    # –°—Ç–µ—Ä–µ–æ ‚Üí –º–æ–Ω–æ
    if len(audio.shape) > 1:
        audio = np.mean(audio, axis=1)

    # –†–µ—Å–µ–º–ø–ª—ñ–Ω–≥
    if sr != 16000:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)

    audio = audio.astype(np.float32)

    scores, embeddings, _ = yamnet(audio)
    prediction = clf.predict(embeddings.numpy())
    final_pred = np.argmax(np.bincount(prediction, minlength=2))
    return final_pred

def audio_callback(indata, frames, time, status):
    pred = classify_audio(indata.copy(), SAMPLERATE)
    label = "üèç –ú–æ—Ç–æ—Ü–∏–∫–ª/–ö–≤–∞–¥—Ä–æ—Ü–∏–∫–ª" if pred == 1 else "üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º"
    print(f"[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: {label}")

with sd.InputStream(callback=audio_callback, channels=1, samplerate=SAMPLERATE, blocksize=int(SAMPLERATE * DURATION)):
    print("üé§ –°–ª—É—Ö–∞—é... –ù–∞—Ç–∏—Å–Ω–∏ Ctrl+C –¥–ª—è –≤–∏—Ö–æ–¥—É")
    while True:
        pass


üé§ –°–ª—É—Ö–∞—é... –ù–∞—Ç–∏—Å–Ω–∏ Ctrl+C –¥–ª—è –≤–∏—Ö–æ–¥—É
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üèç –ú–æ—Ç–æ—Ü–∏–∫–ª/–ö–≤–∞–¥—Ä–æ—Ü–∏–∫–ª
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üèç –ú–æ—Ç–æ—Ü–∏–∫–ª/–ö–≤–∞–¥—Ä–æ—Ü–∏–∫–ª
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–µ–∑—É–ª—å—Ç–∞—Ç: üåÜ –§–æ–Ω–æ–≤–∏–π —à—É–º
[–†–µ–∞–ª—å–Ω–∏–π —á–∞—Å] –†–

KeyboardInterrupt: 