In [None]:
################## IMPORT LIBRARIES ##################

import numpy as np
import random 
import pandas as pd
import os
from os.path import join as pjoin
import matplotlib.pyplot as plt 

In [None]:
################## IMPORT MY MODULES ##################

import sys
sys.path.append('../src')
import helpers as hlp
import importlib
importlib.reload(hlp);

In [None]:
np.random.seed(42)

# directory where the data bases are stored
datapath="/home/ubuntu/guestxr2/home/ubuntu/Data/"

In [None]:
# ---------- SPEECH POOL ------------

# create df with paths to speech files
speech_dataset_path1 =  pjoin(datapath,'VCTK','wav48_silence_trimmed')
speech_dataset_path2 =  pjoin(datapath,'PTDB')

# initialize empty list of files:
speech_pool = []

# fill the list of files with filenames from vctk data base:
database="VCTK"
for root, dirs, files in os.walk(speech_dataset_path1):
    for file in files:
        if file.endswith('.flac'):
            # decide which split based on a probability 
            speech_pool.append({'database_speech': database, 'speech_file_path': os.path.join(root, file)})

# fill the list of files with filenames from ptdb data base:
database="PTDB"
for root, dirs, files in os.walk(speech_dataset_path2):
    for file in files:
        # make sure the correct speech files are used (MIC directory)
        if ("/MIC/" in root) & (file.endswith('.wav')):
            # decide which split based on a probability 
            speech_pool.append({'database_speech': database, 'speech_file_path': os.path.join(root, file)})  

# shuffle order
random.shuffle(speech_pool)

# list to data frame 
speech_pool = pd.DataFrame(speech_pool)
print(f"{len(speech_pool)=}")


In [None]:
# ---------- RIR POOL ------------
database="synth_rirs_mono"
rir_path=pjoin(datapath,database)

# load df with rirs paths and stats (it was generated together with the RIRs - rir_dataset.ipynb):
rir_pool=pd.read_csv(pjoin(rir_path,"rir_info.csv"),index_col=0)
# make a column with a file path that includes current directory 
rir_pool["ir_file_path"] = rir_pool["ir_file_name"].apply(lambda x: pjoin(rir_path, x))
if "ir_clone_file_name" in rir_pool.columns: # if the database contained "cloned" RIRs (same room, different position)
    rir_pool["ir_clone_file_path"] = rir_pool["ir_clone_file_name"].apply(lambda x: pjoin(rir_path, x))

rir_pool["database_rir"]=database
print(f"{len(rir_pool)=}")

In [None]:
# --------- CREATE METADATA FOR A DATASET I.E. COMBINATIONS OF SPEECH AND RIRS --------
from datetime import datetime
date_tag = datetime.now().strftime("%d-%m-%Y--%H-%M")
# create dataset with 150000 data points, which consists of random combinations of speech, noise and rirs
N_datapoints=150000

# # sample from noise pool: 
# df_noise=noise_pool.sample(N_datapoints,replace=True)
# # here plan ways to augment noise data set:
# random_bool_values = [random.choice([1, -1]) for _ in range(len(df_noise))]
# df_noise["aug_phase"]=random_bool_values

# sample from speech pool: 
df_speech=speech_pool.sample(N_datapoints,replace=True)
# here plan ways to augment noise data set:
random_bool_values = [random.choice([1, -1]) for _ in range(len(df_speech))]
df_speech["aug_phase"]=random_bool_values

# sample from rir pool: 
df_rir=rir_pool.sample(N_datapoints,replace=True)

# concatenate samples from speech, noise and rir pools
# df_ds = pd.concat([df_speech.reset_index(drop=True), df_noise.reset_index(drop=True), df_rir.reset_index(drop=True)], axis=1,ignore_index=False)
df_ds = pd.concat([df_speech.reset_index(drop=True), df_rir.reset_index(drop=True)], axis=1,ignore_index=False)

df_ds = df_ds.reset_index(drop=True)

# randomize snr (only high snrs)
# df_ds["snr"]= 200 #np.random.uniform(low=10, high=30, size=len(df_ds))

# Create test-train-val split:
df_ds.loc[0:N_datapoints*0.8,"split"]="train" # 80% training data
df_ds.loc[N_datapoints*0.8:N_datapoints*0.9,"split"]="test" # 10% testing data
df_ds.loc[N_datapoints*0.9:N_datapoints,"split"]="val" # 10% validation data

# save dataset metadata:
# df_ds.to_csv("../dataset-metadata/ds_metadata_" + date_tag + ".csv")

In [None]:
# check metadata in combination with a dataset class definition

sys.path.append('../src')

import helpers as hlp
import dataset as ds
from IPython.display import Audio

importlib.reload(hlp)
importlib.reload(ds)

config=hlp.load_config("../config/basic.yaml")
config["df_metadata"]="/home/ubuntu/guestxr2/home/ubuntu/joanna/CWUNET/dataset-metadata/ds1_metadata_example.csv"
config["split"]="train"
dataset=ds.DatasetReverbTransfer(config)
# get one data sample 
sContent, sStyle, sTarget, sAnechoContent, sAnechoStyle = dataset[55]

# playback for the data sample
audios=[sContent, sStyle, sTarget, sAnechoContent]
names=["sContent", "sStyle", "sTarget", "sAnechoContent"]

for i,audio in enumerate(audios):
    print(names[i])
    audio=audio.squeeze(0).cpu()
    display(Audio(audio,rate=48e3))
