In [2]:
import numpy as np
import librosa

print("Librosa version:", librosa.__version__)
print("Numpy version:", np.__version__)

Librosa version: 0.11.0
Numpy version: 1.26.4


In [3]:
import soundata

# Inicializace datasetu
dataset = soundata.initialize("urbansound8k")

# Stáhne dataset, pokud ještě není
print("📦 Kontroluji dataset...")
dataset.download()  

# Načte metadata
dataset.validate()
clip_ids = dataset.clip_ids

print(f"🔍 V databázi je {len(clip_ids)} klipů.")

INFO: Downloading ['all', 'index']. Index is being stored in /home/cigi/soundenv/lib/python3.10/site-packages/soundata/datasets/indexes, and the rest of files in /home/cigi/sound_datasets/urbansound8k
INFO: [all] downloading UrbanSound8K.tar.gz
INFO: /home/cigi/sound_datasets/urbansound8k/UrbanSound8K.tar.gz already exists and will not be downloaded. Rerun with force_overwrite=True to delete this file and force the download.


📦 Kontroluji dataset...


INFO: /home/cigi/sound_datasets/urbansound8k/UrbanSound8K_README.txt already exists. Run with force_overwrite=True to download from scratch
INFO: /home/cigi/sound_datasets/urbansound8k/audio already exists. Run with force_overwrite=True to download from scratch
INFO: /home/cigi/sound_datasets/urbansound8k/FREESOUNDCREDITS.txt already exists. Run with force_overwrite=True to download from scratch
INFO: /home/cigi/sound_datasets/urbansound8k/metadata already exists. Run with force_overwrite=True to download from scratch
INFO: [index] downloading urbansound8k_index_1.0.json
INFO: /home/cigi/soundenv/lib/python3.10/site-packages/soundata/datasets/indexes/urbansound8k_index_1.0.json already exists and will not be downloaded. Rerun with force_overwrite=True to delete this file and force the download.
100%|██████████| 1/1 [00:00<00:00, 569.26it/s]
100%|██████████| 8732/8732 [00:17<00:00, 502.85it/s]
INFO: Success: the dataset is complete and all files are valid.
INFO: --------------------


🔍 V databázi je 8732 klipů.


In [4]:
def extract_mfcc(path, n_mfcc=13, duration=4.0, sr=22050):
    y, _ = librosa.load(path, sr=sr, duration=duration)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc.T, axis=0)  # průměr přes časové okno

# Kategorie
target_labels = ['gun_shot']
negative_labels = ['children_playing', 'air_conditioner', 'dog_bark']

X = []
y = []

for clip_id in dataset.clip_ids:
    clip = dataset.clip(clip_id)
    if clip.tags:
        label = clip.tags.labels[0]

        if label in target_labels + negative_labels:
            features = extract_mfcc(clip.audio_path)
            X.append(features)
            y.append(1 if label == 'gun_shot' else 0)

X = np.array(X)
y = np.array(y)

print("✅ Dataset hotový:", X.shape, y.shape)

✅ Dataset hotový: (3374, 13) (3374,)


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Rozdělení dat
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Trénink modelu
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Vyhodnocení
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       612
           1       0.92      0.71      0.80        63

    accuracy                           0.97       675
   macro avg       0.94      0.85      0.89       675
weighted avg       0.97      0.97      0.97       675

