In [17]:
from pathlib import Path

import pandas as pd

import numpy as np

from librosa.core import get_duration, resample, load
from librosa.output import write_wav

from tqdm.autonotebook import tqdm

In [3]:
current_dir = Path('.')

In [4]:
config_path = "config"
(current_dir / config_path).mkdir(exist_ok=True)

Данные находятся на 2 уровня выше (если не так, то нужно прописать путь по папки)

In [5]:
data_dir = current_dir / '..' / '..' / 'data' / 'nr'

In [6]:
output_dir = current_dir / '..' / '..' / 'data' / "transformed" / 'nr'

In [9]:
list(data_dir.iterdir())

[WindowsPath('../../data/nr/test'), WindowsPath('../../data/nr/train')]

## Почему так?
Из-за непонятной мне причины, `spleeter` работает только с стерое-аудио, причем с `sample_rate` только 44100.

In [10]:
from_sr = 8000
to_sr = 44100

In [18]:
def preprocess_part(part):
    df = pd.DataFrame(columns=["mix_path", "voice_path", "noise_path", "duration"])
    
    for mix_dir in tqdm((data_dir / part / "mix").iterdir()):
        #### pathes
        mix_path = mix_dir / "mix.wav"
        folder_name = mix_dir.stem
        voice_path = data_dir / part / "source" / folder_name / "1.wav"
        noise_path = data_dir / part / "source" / folder_name / "2.wav"
        
        #### load audio
        mix_audio, _ = load(str(mix_path), from_sr)
        voice_audio, _ = load(str(voice_path), from_sr)
        noise_audio, _ = load(str(noise_path), from_sr)
        
        #### check duration
        mix_duration = get_duration(mix_audio, from_sr)
        voice_duration = get_duration(voice_audio, from_sr)
        noise_duration = get_duration(noise_audio, from_sr)
        assert mix_duration == voice_duration == noise_duration
        
        #### resample
        mix_audio = resample(mix_audio, from_sr, to_sr)
        voice_audio = resample(voice_audio, from_sr, to_sr)
        noise_audio = resample(noise_audio, from_sr, to_sr)        
        
        #### convert to stereo
        mix_audio = np.repeat(mix_audio.reshape(-1, 1), 2, axis=-1).reshape(-1, 2)
        voice_audio = np.repeat(voice_audio.reshape(-1, 1), 2, axis=-1).reshape(-1, 2)
        noise_audio = np.repeat(noise_audio.reshape(-1, 1), 2, axis=-1).reshape(-1, 2)
        
        #### get relatives pathes
        mix_path = mix_path.relative_to(data_dir)
        voice_path = voice_path.relative_to(data_dir)
        noise_path = noise_path.relative_to(data_dir)
        
        #### write transformed wavs
        (output_dir / part / "source" / folder_name).mkdir(parents=True, exist_ok=True)
        (output_dir / part / "mix" / folder_name).mkdir(parents=True, exist_ok=True)
        write_wav(str(output_dir / mix_path), mix_audio, to_sr)
        write_wav(str(output_dir / voice_path), voice_audio, to_sr)
        write_wav(str(output_dir / noise_path), noise_audio, to_sr)
        
        #### save in dataframe
        df.loc[len(df)] = [mix_path, voice_path, noise_path, mix_duration]
        
    df.to_csv(f"{config_path}/voice_{part}.csv", index=False)
    return df.head()

In [20]:
preprocess_part("train")

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




Unnamed: 0,mix_path,voice_path,noise_path,duration
0,train\mix\0006f9c421ef408ab5fd5a4aa41e8fee\mix...,train\source\0006f9c421ef408ab5fd5a4aa41e8fee\...,train\source\0006f9c421ef408ab5fd5a4aa41e8fee\...,15.0
1,train\mix\00a38fa379b8423192eb11a142a6018e\mix...,train\source\00a38fa379b8423192eb11a142a6018e\...,train\source\00a38fa379b8423192eb11a142a6018e\...,15.0
2,train\mix\00a9062c28ec46ac8c39452007a850f8\mix...,train\source\00a9062c28ec46ac8c39452007a850f8\...,train\source\00a9062c28ec46ac8c39452007a850f8\...,15.0
3,train\mix\00ac38d7e579480f83b3adfae1926f9c\mix...,train\source\00ac38d7e579480f83b3adfae1926f9c\...,train\source\00ac38d7e579480f83b3adfae1926f9c\...,15.0
4,train\mix\00f17f6ef7234254a9456f83321a54b4\mix...,train\source\00f17f6ef7234254a9456f83321a54b4\...,train\source\00f17f6ef7234254a9456f83321a54b4\...,15.0


In [21]:
preprocess_part("test")

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




Unnamed: 0,mix_path,voice_path,noise_path,duration
0,test\mix\ccc0f5f83eab4172838dbd9820189421\mix.wav,test\source\ccc0f5f83eab4172838dbd9820189421\1...,test\source\ccc0f5f83eab4172838dbd9820189421\2...,15.0
1,test\mix\ccf2a3d9eea04154ba14e05c4bf77ffb\mix.wav,test\source\ccf2a3d9eea04154ba14e05c4bf77ffb\1...,test\source\ccf2a3d9eea04154ba14e05c4bf77ffb\2...,15.0
2,test\mix\cd1d24c1b72040f2b6548c281a722ac5\mix.wav,test\source\cd1d24c1b72040f2b6548c281a722ac5\1...,test\source\cd1d24c1b72040f2b6548c281a722ac5\2...,15.0
3,test\mix\cd257893cc344a988564cc88c173cbf8\mix.wav,test\source\cd257893cc344a988564cc88c173cbf8\1...,test\source\cd257893cc344a988564cc88c173cbf8\2...,15.0
4,test\mix\cdad48658f2b4207afb0caef69307106\mix.wav,test\source\cdad48658f2b4207afb0caef69307106\1...,test\source\cdad48658f2b4207afb0caef69307106\2...,15.0
