In [1]:
import torch
import torchaudio

In [3]:
test_wav = 'test/ALE2G-chailian-20dB-15M-4K-3Hz.wav'
metadata = torchaudio.info(test_wav)

In [5]:
print(metadata)

AudioMetaData(sample_rate=48000, num_frames=26812800, num_channels=1, bits_per_sample=16, encoding=PCM_S)


In [6]:
waveform, sample_rate = torchaudio.load(test_wav)
num_channels, num_frames = waveform.shape

In [9]:
import matplotlib.pyplot as plt
def plot_waveform(waveform, sample_rate):
    waveform = waveform.numpy()

    num_channels, num_frames = waveform.shape
    time_axis = torch.arange(0, num_frames) / sample_rate

    figure, axes = plt.subplots(num_channels, 1)
    if num_channels == 1:
        axes = [axes]
    for c in range(num_channels):
        axes[c].plot(time_axis, waveform[c], linewidth=1)
        axes[c].grid(True)
        if num_channels > 1:
            axes[c].set_ylabel(f"Channel {c+1}")
    figure.suptitle("waveform")
    plt.show(block=False)

In [21]:
from tqdm import tqdm
import os

def split_long_wav_file(wav_path, new_folder, seg_offset_ms, seg_length_ms, start_ms = 0, verbose = True):
    waveform, sample_rate = torchaudio.load(test_wav)
    num_channels, num_frames = waveform.shape
    
    start = int(start_ms * sample_rate / 1000)
    seg_length = int(seg_length_ms * sample_rate / 1000)
    seg_offset = int(seg_offset_ms * sample_rate / 1000)
    seg_count = int((num_frames - seg_length) / seg_offset) + 1
    
    if verbose:
        print(f'read wav file : {test_wav}')
        print(f'info: {torchaudio.info(test_wav)}')
        print(f'split into {seg_length_ms}ms segments ({seg_length} points), start = {start_ms}ms ({start} points), segment offset = {seg_offset_ms}ms ({seg_offset} points)')
        print(f'segment count : {seg_count}')
        
    if not os.path.exists(new_folder):
        if verbose:
            print(f'{new_folder} is not exsit, create it!')
        os.makedirs(new_folder)
    
    filename = os.path.basename(wav_path)
    name, ext = os.path.splitext(filename)
    for i in tqdm(range(seg_count)):
        segment = waveform[:, start + i * seg_offset : start + i * seg_offset + seg_length]
        seg_path = os.path.join(new_folder, f"{name}-{i:0>5d}{ext}")
        torchaudio.save(seg_path, segment, sample_rate)
    if verbose:
        print(f'Done.')

In [31]:
split_long_wav_file(test_wav, 'test/segments', 2400, 2400, 0, True) # 2.4s per segment

read wav file : test/ALE2G-chailian-20dB-15M-4K-3Hz.wav
info: AudioMetaData(sample_rate=48000, num_frames=26786145, num_channels=1, bits_per_sample=16, encoding=PCM_S)
split into 2400ms segments (115200 points), start = 0ms (0 points), segment offset = 2400ms (115200 points)
segment count : 232
test/segments is not exsit, create it!


100%|██████████| 232/232 [00:00<00:00, 378.68it/s]

Done.



