In [None]:
!pip install webrtcvad pydub librosa soundfile


In [None]:
import os
import zipfile
import shutil
from pathlib import Path
from google.colab import files
from pydub import AudioSegment
import soundfile as sf
from webrtcvad import Vad

In [3]:
def convert_to_vad_compatible(input_path, temp_wav):
    audio = AudioSegment.from_file(input_path)
    audio = audio.set_channels(1).set_frame_rate(16000).set_sample_width(2)
    audio.export(temp_wav, format="wav")

def read_wav(path):
    audio, sr = sf.read(path, dtype='int16')
    if audio.ndim != 1:
        raise ValueError("Только моно аудио поддерживается")
    return audio, sr

def get_speech_segments(audio, sample_rate, aggressiveness=2):
    vad = Vad(aggressiveness)
    frame_duration_ms = 30
    n = int(sample_rate * (frame_duration_ms / 1000.0))
    segments = []
    in_speech = False
    seg_start = None

    for i in range(0, len(audio), n):
        frame = audio[i:i + n]
        if len(frame) != n:
            continue
        is_speech = vad.is_speech(frame.tobytes(), sample_rate)

        if is_speech and not in_speech:
            seg_start = i
            in_speech = True
        elif not is_speech and in_speech:
            segments.append((seg_start, i))
            in_speech = False

    if in_speech:
        segments.append((seg_start, len(audio)))
    return segments

def trim_audio_to_speech(input_path, output_path, aggressiveness=2):
    temp_wav = "temp_vad.wav"
    try:
        convert_to_vad_compatible(input_path, temp_wav)
        audio, sr = read_wav(temp_wav)
        segments = get_speech_segments(audio, sr, aggressiveness)

        if segments:
            start = segments[0][0]
            end = segments[-1][1]
            trimmed = audio[start:end]
        else:
            trimmed = audio

        sf.write(output_path, trimmed, sr)
        duration_orig = len(audio) / sr
        duration_trim = len(trimmed) / sr
        print(f" {Path(input_path).name} → {Path(output_path).name} "
              f"({duration_orig:.2f}s → {duration_trim:.2f}s)")
    except Exception as e:
        print(f" Ошибка обработки {input_path}: {e}")
        convert_to_vad_compatible(input_path, output_path)
    finally:
        if os.path.exists(temp_wav):
            os.remove(temp_wav)


In [None]:
print("Выберите тип входных данных:")
print("1 → Один аудиофайл")
print("2 → ZIP-архив с аудиофайлами")
choice = input("Введите 1 или 2: ").strip()

input_paths = []
is_zip_mode = False

if choice == "1":
    print("\n Загрузите один аудиофайл (mp3, wav, m4a и т.д.)")
    uploaded = files.upload()
    input_paths = list(uploaded.keys())

elif choice == "2":
    print("\nЗагрузите ZIP-архив с аудиофайлами")
    uploaded = files.upload()
    zip_path = list(uploaded.keys())[0]
    is_zip_mode = True

    extract_dir = Path("input_extracted")
    extract_dir.mkdir(exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zf:
        zf.extractall(extract_dir)

    supported_ext = {'.wav', '.mp3', '.m4a', '.flac', '.ogg', '.aac', '.wma'}
    for f in extract_dir.rglob("*"):
        if f.is_file() and f.suffix.lower() in supported_ext:
            input_paths.append(str(f))
else:
    raise ValueError("Неверный ввод. Перезапустите ячейку.")


In [None]:
output_dir = Path("trimmed_output")
output_dir.mkdir(exist_ok=True)

for input_file in input_paths:
    stem = Path(input_file).stem
    output_file = output_dir / f"{stem}.wav"
    trim_audio_to_speech(input_file, str(output_file), aggressiveness=2)

In [None]:
if is_zip_mode:
    output_zip = "trimmed_audio.zip"
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
        for f in output_dir.iterdir():
            zf.write(f, arcname=f.name)
    print(f"\n Создан ZIP-архив: {output_zip}")
    files.download(output_zip)
else:
    print("\n Скачивание обработанного файла...")
    for f in output_dir.iterdir():
        files.download(str(f))


In [None]:
shutil.rmtree("input_extracted", ignore_errors=True)
shutil.rmtree("trimmed_output", ignore_errors=True)