In [2]:
################## IMPORT LIBRARIES ##################

import numpy as np
import random 
import pandas as pd
import os
from os.path import join as pjoin

In [3]:
################## IMPORT MY MODULES ##################

import sys
sys.path.append('../src')
import helpers as hlp
import importlib
importlib.reload(hlp);

In [4]:
np.random.seed(42)

# datapath="/media/ssd2/RESULTS-reverb-match-cond-u-net/"
datapath="/home/ubuntu/Data/"

In [5]:
# ---------- SPEECH POOL ------------

# create df with paths to speech files
speech_dataset_path1 =  pjoin(datapath,'VCTK','wav48_silence_trimmed')
speech_dataset_path2 =  pjoin(datapath,'PTDB')

# initialize empty list of files:
speech_pool = []

# fill the list of files with filenames from vctk data base:
database="VCTK"
for root, dirs, files in os.walk(speech_dataset_path1):
    for file in files:
        if file.endswith('.flac'):
            # decide which split based on a probability 
            speech_pool.append({'database_speech': database, 'speech_file_path': os.path.join(root, file)})

# fill the list of files with filenames from ptdb data base:
database="PTDB"
for root, dirs, files in os.walk(speech_dataset_path2):
    for file in files:
        # make sure the correct speech files are used (MIC directory)
        if ("/MIC/" in root) & (file.endswith('.wav')):
            # decide which split based on a probability 
            speech_pool.append({'database_speech': database, 'speech_file_path': os.path.join(root, file)})  

# shuffle order
random.shuffle(speech_pool)

# turn list to data frame 
speech_pool = pd.DataFrame(speech_pool)
print(f"{len(speech_pool)=}")


len(speech_pool)=93046


In [6]:
# ---------- RIR POOL ------------
rir_path=pjoin(datapath,'synth_rirs_mono')

database="synth_rirs_mono"
# load df with rirs paths and stats (it was generated together with the RIRs - rir_dataset.ipynb):
rir_pool=pd.read_csv(pjoin(rir_path,"rir_info.csv"),index_col=0)
rir_pool["database_rir"]=database
print(f"{len(rir_pool)=}")

len(rir_pool)=10000


In [10]:
# --------- CREATE METADATA FOR A DATASET BEING COMBINATIONS OF SPEECH AND RIRS --------
from datetime import datetime
date_tag = datetime.now().strftime("%d-%m-%Y--%H-%M")
# create dataset with 150000 data points, which consists of random combinations of speech, noise and rirs
N_datapoints=150000

# # sample from noise pool: 
# df_noise=noise_pool.sample(N_datapoints,replace=True)
# # here plan ways to augment noise data set:
# random_bool_values = [random.choice([1, -1]) for _ in range(len(df_noise))]
# df_noise["aug_phase"]=random_bool_values

# sample from speech pool: 
df_speech=speech_pool.sample(N_datapoints,replace=True)
# here plan ways to augment noise data set:
random_bool_values = [random.choice([1, -1]) for _ in range(len(df_speech))]
df_speech["aug_phase"]=random_bool_values

# sample from rir pool: 
df_rir=rir_pool.sample(N_datapoints,replace=True)

# concatenate samples from speech, noise and rir pools
# df_ds = pd.concat([df_speech.reset_index(drop=True), df_noise.reset_index(drop=True), df_rir.reset_index(drop=True)], axis=1,ignore_index=False)
df_ds = pd.concat([df_speech.reset_index(drop=True), df_rir.reset_index(drop=True)], axis=1,ignore_index=False)

df_ds = df_ds.reset_index(drop=True)

# randomize snr (only high snrs)
# df_ds["snr"]= 200 #np.random.uniform(low=10, high=30, size=len(df_ds))

# Create test-train-val split:
df_ds.loc[0:N_datapoints*0.8,"split"]="train" # 80% training data
df_ds.loc[N_datapoints*0.8:N_datapoints*0.9,"split"]="test" # 10% testing data
df_ds.loc[N_datapoints*0.9:N_datapoints,"split"]="val" # 10% validation data

# save dataset metadata:
# df_ds.to_csv(date_tag+"_data_set.csv")


In [11]:
# # Change paths in the metadata from GuestXR computer: 

# df = pd.read_csv("../dataset-metadata/nonoise2_guestxr2.csv",index_col=0)

# df["speech_file_path"]=df["speech_file_path"].str.replace("/home/ubuntu/Data/","/media/ssd2/")
# df["noise_file_path"]=df["noise_file_path"].str.replace("/home/ubuntu/Data/","/media/ssd2/")
# df["ir_file_path"]=df["ir_file_path"].str.replace("/home/ubuntu/Data/","/media/ssd2/")

# df.to_csv("../dataset-metadata/nonoise2_dacom.csv",index=False)

# df.head(10)

In [12]:
# Check generate database

df = pd.read_csv("../dataset-metadata/17-05-2024--15-42_data_set.csv",index_col=0)


In [13]:
df.head(100)

Unnamed: 0,database_speech,speech_file_path,aug_phase,room_x,room_y,room_z,volume,rt60_set,mic_pos_x,mic_pos_y,...,rt60_masp_stats,cd_masp_stats,mfp_masp_stats,rt30_meas,rt20_meas,edt_meas,c50_meas,ir_file_path,database_rir,split
0,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,1,10.814493,8.384743,4.682995,424.638783,0.447588,6.563892,3.201019,...,0.447572,1.737814,4.702880,0.510937,0.481242,0.397801,9.168168,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x108...,synth_rirs_mono,train
1,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,21.416892,20.061618,3.688619,1584.843083,0.777491,8.816590,8.991853,...,0.777463,2.547290,5.440065,1.481501,1.474584,0.662867,6.342023,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x214...,synth_rirs_mono,train
2,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,12.777369,10.481738,3.152150,422.164396,0.417115,7.610133,4.378013,...,0.417145,1.794826,4.074057,0.616033,0.601062,0.418212,8.612042,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x127...,synth_rirs_mono,train
3,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,1,6.872966,4.640696,2.550688,81.355082,0.190540,4.206436,2.948903,...,0.190547,1.165779,2.655924,0.229588,0.234578,0.245114,15.697762,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x687...,synth_rirs_mono,train
4,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,23.710565,21.738363,3.192800,1645.661533,0.866427,12.951658,8.587307,...,0.866481,2.458758,4.982790,1.914648,1.963799,1.031423,5.629995,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x237...,synth_rirs_mono,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,6.215067,4.111804,3.845579,98.274273,0.185800,2.533622,2.185450,...,0.185808,1.297516,3.011411,0.166693,0.175208,0.243316,19.372602,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x622...,synth_rirs_mono,train
96,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/s...,1,29.904584,22.536426,4.082205,2751.171454,1.049392,16.602824,9.201017,...,1.049393,2.888782,6.196211,2.175475,2.167799,0.893655,6.900420,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x299...,synth_rirs_mono,train
97,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,6.790112,3.811883,4.796528,124.149069,0.234304,4.252338,1.421239,...,0.234317,1.298658,3.235750,0.245700,0.260149,0.259550,15.381532,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x679...,synth_rirs_mono,train
98,VCTK,/home/ubuntu/Data/VCTK/wav48_silence_trimmed/p...,-1,7.542147,6.451940,4.749111,231.098765,0.300655,4.130169,3.190674,...,0.300643,1.564222,4.014886,0.306760,0.313075,0.309348,13.798457,/home/ubuntu/Data/synth_rirs_mono/monoRIR_x754...,synth_rirs_mono,train
