In [None]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor
import py7zr  # Install with: !pip install py7zr
from tqdm import tqdm
import shutil
from glob import glob

In [None]:
download_dir = ".../toycar_parts"
extract_dir = ".../toycar_data"

os.makedirs(download_dir, exist_ok=True)
os.makedirs(extract_dir, exist_ok=True)

# ToyCar split archive parts from Zenodo
base_url = "https://zenodo.org/records/3351307/files/"
archive_parts = [f"ToyCar.7z.00{i}" for i in range(1, 9)]

# Download the archive parts
def download_file(url, save_path):
    """Download a file if it doesn't exist"""
    if not os.path.exists(save_path):
        print(f"📥 Downloading: {url}")
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(save_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=262144):  # 256 KB
                f.write(chunk)
        print(f"✅ Downloaded: {save_path}")
    else:
        print(f"🔹 Skipping {os.path.basename(save_path)}, already exists.")

def parallel_download(parts, base_url, target_dir, max_workers=3):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for part in parts:
            file_url = base_url + part
            file_path = os.path.join(target_dir, part)
            executor.submit(download_file, file_url, file_path)

parallel_download(archive_parts, base_url, download_dir)

In [None]:
# Define all relevant case and sound type folders
cases = [f"case{i}" for i in range(1, 5)]
sound_types = ["AnomalousSound_IND", "NormalSound_IND"]

# Loop through each combination of case and sound type
for case in cases:
    for sound_type in sound_types:
        pattern = f"ToyCar/{case}/{sound_type}/*_ch1_*.wav"
        print(f"🗜 Extracting: {pattern}")
        !7z x "{download_dir}/ToyCar.7z.001" -o"{extract_dir}" "{pattern}" -y

print("✅ Filtered extraction completed.")


🗜 Extracting: ToyCar/case1/AnomalousSound_IND/*_ch1_*.wav

7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,8 CPUs AMD EPYC 7B12 (830F10),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan /content/toycar_parts/                                1 file, 8522825728 bytes (8128 MiB)

Extracting archive: /content/toycar_parts/ToyCar.7z.001
  0% 1 Open           --
Path = /content/toycar_parts/ToyCar.7z.001
Type = Split
Physical Size = 8522825728
Volumes = 8
Total Physical Size = 68174553501
----
Path = ToyCar.7z
Size = 68174553501
--
Path = ToyCar.7z
Type = 7z
Physical Size = 68174553501
Headers Size = 235920
Method = Delta LZMA2:27
Solid = +
Blocks = 24

  0%      0% 5 . ToyCar/case1/AnomalousSound_IN . ar_case1_ab01_IND_ch2_0001.wav  

In [None]:
# Count the number of wav files in both folders
# Define folder paths
anom_paths = [
    "/.../toycar_filtered/ToyCar/case1/AnomalousSound_IND",
    "/.../toycar_filtered/ToyCar/case2/AnomalousSound_IND",
    "/.../toycar_filtered/ToyCar/case3/AnomalousSound_IND",
    "/.../toycar_filtered/ToyCar/case4/AnomalousSound_IND",
]

normal_paths = [
    "/.../toycar_filtered/ToyCar/case1/NormalSound_IND",
    "/.../toycar_filtered/ToyCar/case2/NormalSound_IND",
    "/.../toycar_filtered/ToyCar/case3/NormalSound_IND",
    "/.../toycar_filtered/ToyCar/case4/NormalSound_IND",
]

# Function to count files in a folder
def count_files(folders):
    total = 0
    for folder_path in folders:
        if os.path.exists(folder_path):
            count = len([
                f for f in os.listdir(folder_path)
                if os.path.isfile(os.path.join(folder_path, f)) and f.endswith(".wav")
            ])
            print(f"✔️ Found {count} files in: {folder_path}")
            total += count
        else:
            print(f"🚨 ERROR: Folder not found: {folder_path}")
    return total

# Count files
anom_count = count_files(anom_paths)
normal_count = count_files(normal_paths)

# Print results
print(f"📂 Number of files in AnomalousSound: {anom_count}")
print(f"📂 Number of files in NormalSound: {normal_count}")

✔️ Found 264 files in: /content/toycar_filtered/ToyCar/case1/AnomalousSound_IND
✔️ Found 265 files in: /content/toycar_filtered/ToyCar/case2/AnomalousSound_IND
✔️ Found 265 files in: /content/toycar_filtered/ToyCar/case3/AnomalousSound_IND
✔️ Found 265 files in: /content/toycar_filtered/ToyCar/case4/AnomalousSound_IND
✔️ Found 1350 files in: /content/toycar_filtered/ToyCar/case1/NormalSound_IND
✔️ Found 1350 files in: /content/toycar_filtered/ToyCar/case2/NormalSound_IND
✔️ Found 1350 files in: /content/toycar_filtered/ToyCar/case3/NormalSound_IND
✔️ Found 1350 files in: /content/toycar_filtered/ToyCar/case4/NormalSound_IND
📂 Number of files in AnomalousSound: 1059
📂 Number of files in NormalSound: 5400


In [None]:
#Move wav files to one folder for anomalous sounds

# Destination folder
destination_folder = "/.../toycar_data/AnomalousSound"
destination_folder = "/.../ToyCar_data/AnomalousSound"
os.makedirs(destination_folder, exist_ok=True)

# Move .wav files
file_count = 0
for folder in anom_paths:
    if os.path.exists(folder):
        for wav_file in glob(os.path.join(folder, "*.wav")):
            try:
                shutil.move(wav_file, destination_folder)
                file_count += 1
            except Exception as e:
                print(f"❌ Error moving file {wav_file}: {e}")
    else:
        print(f"⚠️ Folder does not exist: {folder}")

print(f"✅ Moved {file_count} files to {destination_folder}")


✅ Moved 1059 files to /content/toycar_data/AnomalousSound


In [None]:
# Move wav files to one folder for normal sounds

# Destination folder
destination_folder = "/.../toycar_data/NormalSound"
os.makedirs(destination_folder, exist_ok=True)

# Move .wav files
file_count = 0
for folder in normal_paths:
    if os.path.exists(folder):
        for wav_file in glob(os.path.join(folder, "*.wav")):
            try:
                shutil.move(wav_file, destination_folder)
                file_count += 1
            except Exception as e:
                print(f"❌ Error moving file {wav_file}: {e}")
    else:
        print(f"⚠️ Folder does not exist: {folder}")

print(f"✅ Moved {file_count} files to {destination_folder}")


✅ Moved 5400 files to /content/toycar_data/NormalSound
