In [2]:
import random
import uuid
from pathlib import Path

import pandas as pd

import numpy as np

from librosa.core import get_duration, resample, load
from librosa.output import write_wav

from pydub import AudioSegment

from tqdm.autonotebook import tqdm, trange

In [3]:
import IPython.display as ipd

In [4]:
current_dir = Path('.')

In [5]:
config_path = "config"
(current_dir / config_path).mkdir(exist_ok=True)

Данные находятся на 2 уровня выше (если не так, то нужно прописать путь по папки)

In [6]:
data_dir = current_dir / '..' / '..' / 'data' / 'nr'

In [7]:
base_output_dir = current_dir / '..' / '..' / 'data' / "transformed" / "diarisation" / 'nr'

In [8]:
list(data_dir.iterdir())

[WindowsPath('../../data/nr/test'), WindowsPath('../../data/nr/train')]

## Почему так?
Из-за непонятной мне причины, `spleeter` работает только с стерое-аудио, причем с `sample_rate` только 44100.

In [9]:
from_sr = 8000
to_sr = 44100
to_channels = "2"

In [10]:
def preprocess_part(part, num_audios):
    df = pd.DataFrame(columns=["mix_path", "first_speaker_path", "second_speaker_path", "duration"])
    
    source_audio_dir = data_dir / part / "source"
    source_audios = list(source_audio_dir.iterdir())
    
    for i in trange(num_audios):
        # select 2 random audios to mix
        first_speaker_filename, second_speaker_filename = random.sample(source_audios, k=2)
        # read audios
        first_speaker = AudioSegment.from_wav(first_speaker_filename / "1.wav")
        second_speaker = AudioSegment.from_wav(second_speaker_filename / "1.wav")
        
        # random offset from start for mix
        offset_duration = random.randint(0, 5) * 1000 # seconds to milliseconds
        first_speaker += AudioSegment.silent(duration=offset_duration, frame_rate=to_sr)
        second_speaker = AudioSegment.silent(duration=offset_duration, frame_rate=to_sr) + second_speaker
        
        # overlay 2 audios
        mix_audio = first_speaker.overlay(second_speaker)
        
        # save to random output folder
        output_folder = base_output_dir / part / str(uuid.uuid4().hex)
        output_folder.mkdir(parents=True, exist_ok=True)
        first_speaker.export(output_folder / "first_speaker.wav", parameters=["-ac", to_channels])
        second_speaker.export(output_folder / "second_speaker.wav", parameters=["-ac", to_channels])
        mix_audio.export(output_folder / "mix_audio.wav", parameters=["-ac", to_channels])
        
        # append to dataframe
        relative_path = output_folder.relative_to(base_output_dir)
        df.loc[len(df)] = [str(relative_path / "mix_audio.wav"), str(relative_path / "first_speaker.wav"), str(relative_path / "second_speaker.wav"), mix_audio.duration_seconds]
        
    df.to_csv(f"{config_path}/voice_{part}.csv", index=False)
    return df.head()

In [11]:
preprocess_part("train", num_audios=3000)

HBox(children=(FloatProgress(value=0.0, max=3000.0), HTML(value='')))




Unnamed: 0,mix_path,first_speaker_path,second_speaker_path,duration
0,train\4cda1ebc580e4a3dbf053878c2604e7c\mix_aud...,train\4cda1ebc580e4a3dbf053878c2604e7c\first_s...,train\4cda1ebc580e4a3dbf053878c2604e7c\second_...,18.0
1,train\70f0b0237be742c89890312d84b62d2a\mix_aud...,train\70f0b0237be742c89890312d84b62d2a\first_s...,train\70f0b0237be742c89890312d84b62d2a\second_...,15.0
2,train\8dd1e6aff7fe4299aed725f9bf674258\mix_aud...,train\8dd1e6aff7fe4299aed725f9bf674258\first_s...,train\8dd1e6aff7fe4299aed725f9bf674258\second_...,18.0
3,train\3310552a6da5499a86bded48b66cccd4\mix_aud...,train\3310552a6da5499a86bded48b66cccd4\first_s...,train\3310552a6da5499a86bded48b66cccd4\second_...,16.0
4,train\011fb596e13044ab8567e5e566250b0d\mix_aud...,train\011fb596e13044ab8567e5e566250b0d\first_s...,train\011fb596e13044ab8567e5e566250b0d\second_...,16.0


In [12]:
preprocess_part("test", num_audios=400)

HBox(children=(FloatProgress(value=0.0, max=400.0), HTML(value='')))




Unnamed: 0,mix_path,first_speaker_path,second_speaker_path,duration
0,test\629b1ff449ce42969eede030f6a04116\mix_audi...,test\629b1ff449ce42969eede030f6a04116\first_sp...,test\629b1ff449ce42969eede030f6a04116\second_s...,15.0
1,test\5720b430a5c642ec9446b3c4bccffd6b\mix_audi...,test\5720b430a5c642ec9446b3c4bccffd6b\first_sp...,test\5720b430a5c642ec9446b3c4bccffd6b\second_s...,20.0
2,test\fdceb1c6905d4455afd85462cabc7822\mix_audi...,test\fdceb1c6905d4455afd85462cabc7822\first_sp...,test\fdceb1c6905d4455afd85462cabc7822\second_s...,18.0
3,test\405120b1889c4a2f88483dc0d044258f\mix_audi...,test\405120b1889c4a2f88483dc0d044258f\first_sp...,test\405120b1889c4a2f88483dc0d044258f\second_s...,17.0
4,test\ac6c246da01f469286d0091af21be786\mix_audi...,test\ac6c246da01f469286d0091af21be786\first_sp...,test\ac6c246da01f469286d0091af21be786\second_s...,18.0
