In [8]:
import csv, os, subprocess
from urllib.request import urlretrieve

# Nastavení
CSV_FILE = "unbalanced_train_segments.csv"
CSV_URL = "https://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/unbalanced_train_segments.csv"
GUNSHOT_MID = "/m/03qtq"
DOWNLOAD_DIR = "gunshots_audioset"

os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# Stáhnout CSV pokud není
if not os.path.exists(CSV_FILE):
    print("📥 Stahuji anotace...")
    urlretrieve(CSV_URL, CSV_FILE)

# Načíst a vyfiltrovat gunshot segmenty
gunshots = []
with open(CSV_FILE, newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        if row[0].startswith("#"):  # přeskočíme hlavičku
            continue
        labels = row[3].strip().strip('"').split(',')
        if GUNSHOT_MID in labels:
            gunshots.append(row)

print(f"🔫 Nalezeno {len(gunshots)} gunshot segmentů v unbalanced setu")

# Stáhnout a vyříznout segmenty
for i, row in enumerate(gunshots[:300]):  # změň na len(gunshots) pro ALL
    ytid = row[0].strip('"')
    start = float(row[1])
    end = float(row[2])
    outname = f"{ytid}_{int(start*1000)}_{int(end*1000)}.wav"
    outpath = os.path.join(DOWNLOAD_DIR, outname)
    if os.path.exists(outpath):
        continue

    print(f"[{i+1}/{len(gunshots)}] ⬇️ {outname}")
    try:
        subprocess.run([
            "yt-dlp",
            "-f", "bestaudio[ext=m4a]",
            "--quiet",
            "--no-warnings",
            f"https://www.youtube.com/watch?v={ytid}",
            "-o", "temp.m4a"
        ], check=True)

        subprocess.run([
            "ffmpeg",
            "-loglevel", "quiet",
            "-y",
            "-ss", str(start),
            "-to", str(end),
            "-i", "temp.m4a",
            "-ar", "16000",
            "-ac", "1",
            outpath
        ], check=True)

    except subprocess.CalledProcessError as e:
        print(f"⚠️ Chyba u {ytid}: {e}")
        continue

if os.path.exists("temp.m4a"):
    os.remove("temp.m4a")

print("✅ Vše hotovo!")


📥 Stahuji anotace...
🔫 Nalezeno 398 gunshot segmentů v unbalanced setu
[1/398] ⬇️ -0-9MjhjYXo_30000_40000.wav
[2/398] ⬇️ -0DGa_6exhc_30000_40000.wav
[3/398] ⬇️ -6uW8qkvA5A_30000_40000.wav
[4/398] ⬇️ -D9gxkL5jnQ_23000_33000.wav
[5/398] ⬇️ -LBdsxhIEvo_0_10000.wav
[6/398] ⬇️ -ZFflYSjN20_50000_60000.wav


ERROR: [youtube] -ZFflYSjN20: Video unavailable


[7/398] ⬇️ -gBpGXiZ6E0_220000_230000.wav
[8/398] ⬇️ -gbW7u8t9y4_19000_29000.wav
[9/398] ⬇️ -xHXCw5_Va4_10000_20000.wav
[10/398] ⬇️ 00XluwV4_Do_30000_40000.wav
[11/398] ⬇️ 0Owq5Nm49EY_30000_40000.wav
[12/398] ⬇️ 0aKZHa0wk_Y_30000_40000.wav


ERROR: [youtube] 0aKZHa0wk_Y: Requested format is not available. Use --list-formats for a list of available formats


[13/398] ⬇️ 0bnhxHX_IWI_12000_22000.wav
[14/398] ⬇️ 0iuyj4W7v0k_30000_40000.wav
[15/398] ⬇️ 0nxQwgeGETM_9000_19000.wav
[16/398] ⬇️ 0z3f7FIVc1E_30000_40000.wav
[17/398] ⬇️ 1IX6UMUDho0_30000_40000.wav


ERROR: [youtube] 1IX6UMUDho0: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[18/398] ⬇️ 1J05b-kiIEM_270000_280000.wav
[19/398] ⬇️ 1XhssJxMwc8_30000_40000.wav
[20/398] ⬇️ 1aHUowNW-R8_40000_50000.wav



ERROR: Interrupted by user


KeyboardInterrupt: 

FreeSound


In [1]:
import freesound
import os

# 🔑 API klíč
API_KEY = "VV7pMu2KzRHPQkBZAFeSg96sH4hPdHi6OMryNnCc"

client = freesound.FreesoundClient()
client.set_token(API_KEY, "token")

# 📁 Cílová složka
output_dir = "freesound_gunshots"
os.makedirs(output_dir, exist_ok=True)

# 🔍 Vyhledání výsledků (BEZ filtru, pro jistotu)
pager = client.text_search(query="gunshot", page_size=15)

print("🔍 Výpis výsledků:\n")
for i, sound in enumerate(pager):
    print(f"[{i+1}] {sound.name} (ID: {sound.id}) | Licence: {sound.license}")
    try:
        # 🟢 Vytažení jména a cesty k souboru
        filename = f"{sound.id}_{sound.name.replace(' ', '_').replace('/', '_')}.mp3"
        filepath = os.path.join(output_dir, filename)

        # ⬇️ Stažení náhledu (preview MP3)
        preview_url = sound.previews.preview_lq_mp3
        import urllib.request
        urllib.request.urlretrieve(preview_url, filepath)
        print(f"   ✅ Uloženo jako {filename}")
    except Exception as e:
        print(f"   ❌ Chyba: {e}")

print("\n✅ Hotovo!")

🔍 Výpis výsledků:

[1] archi_gunshot_scifi_05.flac (ID: 507488) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507488_archi_gunshot_scifi_05.flac.mp3
[2] archi_gunshot_scifi_07.flac (ID: 507486) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507486_archi_gunshot_scifi_07.flac.mp3
[3] archi_gunshot_scifi_06.flac (ID: 507487) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507487_archi_gunshot_scifi_06.flac.mp3
[4] archi_gunshot_scifi_08.flac (ID: 507485) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507485_archi_gunshot_scifi_08.flac.mp3
[5] archi_gunshot_scifi_01.flac (ID: 507484) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507484_archi_gunshot_scifi_01.flac.mp3
[6] archi_gunshot_scifi_03.flac (ID: 507482) | Licence: http://creativecommons.org/publicdomain/zero/1.0/
   ✅ Uloženo jako 507482_archi_gunshot_scifi_03.flac.mp3
[7]

In [None]:
import freesound
import os
import urllib.request

API_KEY = "VV7pMu2KzRHPQkBZAFeSg96sH4hPdHi6OMryNnCc"
client = freesound.FreesoundClient()
client.set_token(API_KEY, "token")

output_dir = "freesound_gunshots"
os.makedirs(output_dir, exist_ok=True)

queries = ["gunshot", "rifle", "pistol", "machine gun", "shotgun"]
downloaded_ids = set()

# Omezovač
MAX_SOUNDS_PER_QUERY = 3000
query_downloaded = 0

for query in queries:
    print(f"\n🔎 Hledám: {query}")
    pager = client.text_search(
        query=query,
        fields="id,name,previews",
        page_size=50,
        sort="score"
    )

    while pager:
        
        for sound in pager:
            if query_downloaded >= MAX_SOUNDS_PER_QUERY:
                print(f"⚠️ Limit {MAX_SOUNDS_PER_QUERY} zvuků pro '{query}' dosažen.")
                break

            if sound.id in downloaded_ids:
                continue
            safe_name = f"{sound.id}_{sound.name.replace(' ', '_').replace('/', '_')}.mp3"
            path = os.path.join(output_dir, safe_name)
            if os.path.exists(path):
                downloaded_ids.add(sound.id)
                query_downloaded += 1
                continue
            try:
                preview_url = sound.previews.preview_lq_mp3
                if not preview_url or preview_url == "None":
                    print(f"❌ Chyba: {sound.name} nemá dostupný MP3 náhled.")
                    continue
                print(f"⬇️ {safe_name}")
                urllib.request.urlretrieve(preview_url, path)
                downloaded_ids.add(sound.id)
            except Exception as e:
                print(f"❌ Chyba u {sound.name}: {e}")

        
        if pager.next is None:
            break
        pager = pager.next_page()


print(f"\n✅ Hotovo! Staženo {len(downloaded_ids)} unikátních zvuků.")



🔎 Hledám: gunshot
⬇️ 507488_archi_gunshot_scifi_05.flac.mp3
⬇️ 507486_archi_gunshot_scifi_07.flac.mp3
⬇️ 507487_archi_gunshot_scifi_06.flac.mp3
⬇️ 507485_archi_gunshot_scifi_08.flac.mp3
⬇️ 507484_archi_gunshot_scifi_01.flac.mp3
⬇️ 507482_archi_gunshot_scifi_03.flac.mp3
⬇️ 562868_throwing_stuff_in_dumpster_sounds_like_gunshots.mp3
⬇️ 539141_Assault_rifle_suppressor_gunshot..mp3
⬇️ 507483_archi_gunshot_scifi_02.flac.mp3
⬇️ 128302_Layered_Gunshot_8.wav.mp3
⬇️ 128299_Layered_Gunshot_5.wav.mp3
⬇️ 709873_Rifle_gunshot,_just_one_shot.mp3
⬇️ 516684_gunshot.m4a.mp3
⬇️ 402012_gunshot__high_5.wav.mp3
⬇️ 402008_gunshot__high_3.wav.mp3
⬇️ 344524_Gunshot02.wav.mp3
⬇️ 128301_Layered_Gunshot_9.wav.mp3
⬇️ 507481_archi_gunshot_scifi_04.flac.mp3
⬇️ 710084_Rifle_gunshot,_one_shot.mp3
⬇️ 687398_9mm__Gunshot.mp3.mp3
⬇️ 660404_Laser_gunshot.mp3
⬇️ 638892_Single_Gunshot.mp3
⬇️ 501108_8-bit_gunshot.wav.mp3
⬇️ 501101_8-bit_gunshot_2.wav.mp3
⬇️ 507489_archi_gunshot_scifi_09.flac.mp3
⬇️ 402791_AK47_and_various_o

KeyboardInterrupt: 

In [1]:
#FILTER

import os
import shutil
from pydub.utils import mediainfo
from pydub import AudioSegment

INPUT_DIR = "freesound_gunshots"
REJECTED_DIR = "freesound_gunshots/REJECTED"
os.makedirs(REJECTED_DIR, exist_ok=True)

# Nastavení filtrů
MIN_DURATION = 0.1   # v sekundách
MAX_DURATION = 3.0   # v sekundách
BAD_KEYWORDS = ["metal", "drum", "clap", "kick", "throw", "crowd", "hit", "loop", "sample", "whoosh", "slam"]

# Seznam všech .mp3 souborů
files = [f for f in os.listdir(INPUT_DIR) if f.endswith(".mp3")]

rejected_count = 0

for file in files:
    path = os.path.join(INPUT_DIR, file)
    try:
        audio = AudioSegment.from_file(path)
        duration = len(audio) / 1000.0  # v sekundách
        name_lower = file.lower()

        # Filtr: špatná délka nebo podezřelé slovo
        if duration < MIN_DURATION or duration > MAX_DURATION or any(bad in name_lower for bad in BAD_KEYWORDS):
            print(f"🚫 Filtruji: {file} ({duration:.2f}s)")
            shutil.move(path, os.path.join(REJECTED_DIR, file))
            rejected_count += 1

    except Exception as e:
        print(f"❌ Chyba při zpracování {file}: {e}")

print(f"\n✅ Hotovo! Odstraněno {rejected_count} špatných souborů.")


🚫 Filtruji: 570304_gun_II:_Car_Machine_Gun_Overheat..mp3 (7.11s)
🚫 Filtruji: 86346_Lark_with_MG.aif.mp3 (17.64s)
🚫 Filtruji: 342238_Rocket.wav.mp3 (6.85s)
🚫 Filtruji: 712555_krinkov-aks74u-parts_disassembling_and_assembling.mp3 (14.37s)
🚫 Filtruji: 414023_Realistic_Gun-Fire.mp3 (4.45s)
🚫 Filtruji: 392226_Single_Gunshot_6.2_HP.wav.mp3 (3.29s)
🚫 Filtruji: 445111_Airsoft_Rifle_Mag_change.mp3 (11.09s)
🚫 Filtruji: 678527_Gunshots_Rifle.mp3 (5.27s)
🚫 Filtruji: 675602_S20-46_On_the_receiving_end_of_a_50_cal_Browning_machine_gun.wav.mp3 (6.68s)
🚫 Filtruji: 675670_S18-21_Rifle_shots.wav.mp3 (4.18s)
🚫 Filtruji: 201669_AK47_down_the_block.aif.mp3 (3.42s)
🚫 Filtruji: 513353_Shotgun_Being_Loaded2.mp3.mp3 (7.43s)
🚫 Filtruji: 182052_Arctic_Shooting_Training_[FpR3Ni1LSBg].flac.mp3 (10.71s)
🚫 Filtruji: 320132_Kivääri,_kaikuvia_laukauksia_ulkona___A_rifle,_7,62_calibre,_echoing,_single_shots_at_a_distance,_also_ricochets,_exteriorv.mp3 (32.15s)
🚫 Filtruji: 626221_dog_toy_glass_bowl.wav.mp3 (9.78s)
🚫 Fil