# Resample audio wavs
Refer to: [audio resampling tutorial](https://pytorch.org/audio/0.10.0/tutorials/audio_resampling_tutorial.html)

In [None]:
%cd ..
%ls

In [7]:
import os
import torchaudio
import torchaudio.transforms as T
import concurrent.futures
from pathlib import Path
import random

In [8]:
# Example usage:
input_directory = "/Users/daniilrobnikov/Developer/TTS/vits/split1"
output_directory = "/Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned"
orig_sr = 16000
new_sr = 22050

In [9]:
def resample_wav_files(input_dir, output_dir, sr, new_sr):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Create a resampler object
    resampler = T.Resample(
        sr,
        new_sr,
        lowpass_filter_width=128,
        rolloff=0.99999,
        resampling_method="sinc_interp_hann",
    )

    def resample_file(file_path):
        # Load the audio file
        waveform, sample_rate = torchaudio.load(file_path)
        assert sample_rate == sr

        # Resample the audio
        resampled_waveform = resampler(waveform)

        # Construct the output file path
        output_file = Path(output_dir) / Path(file_path).relative_to(input_dir)

        # Save the resampled audio
        torchaudio.save(output_file, resampled_waveform, new_sr)

        return output_file

    # Use generator to find .wav files and pre-create output directories
    def find_and_prep_wav_files(input_dir, output_dir):
        for root, _, files in os.walk(input_dir):
            for file in files:
                if file.endswith(".wav"):
                    file_path = Path(root) / file
                    output_file = Path(output_dir) / file_path.relative_to(input_dir)
                    os.makedirs(output_file.parent, exist_ok=True)
                    yield str(file_path)

    # Resample the .wav files using threads for parallel processing
    wav_files = find_and_prep_wav_files(input_dir, output_dir)
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for i, output_file in enumerate(executor.map(resample_file, wav_files)):
            if i % 1000 == 0:
                print(f"{i}: {output_file}")

resample_wav_files(input_directory, output_directory, orig_sr, new_sr)

0: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982239117.wav
1000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982163471.wav
2000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982243109.wav
3000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474981977517.wav
4000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982151956.wav
5000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982230487.wav
6000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982148431.wav
7000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982063925.wav
8000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982245493.wav
9000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982076816.wav
10000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982426392.wav
11000: /Users/daniilrobnikov/Developer/TTS/vits/split1.cleaned/281474982070535.wav
12000: /Users/dan

In [10]:
# Test random file to see if it worked
out_path = os.path.join(output_directory, os.listdir(output_directory)[random.randint(0, len(os.listdir(output_directory)))])

waveform, sample_rate = torchaudio.load(out_path)
print(f"Sample rate: {sample_rate}")

Sample rate: 22050
