In [1]:
import glob
import pandas as pd
from tqdm import tqdm
from scipy.io.wavfile  import write, read
import os
from os import path
from sklearn.utils import shuffle
from os.path import split as sp, basename as bn

In [2]:
ROOT_DATA_PATH = "/home/jeevan/datasets/TIMIT Acoustic-Phonetic Continuous Speech Corpus (LDC93S1)/TIMIT"
DATA_LOCATION = f"{ROOT_DATA_PATH}/T*/*/*/*.WAV"


In [3]:
# Fetch all audio paths from TIMIT dataset
AUDIO_PATHS = glob.glob(DATA_LOCATION)
AUDIO_PATHS.sort()
len(AUDIO_PATHS)

6300

In [4]:
# Vowel list: TIMIT
v_dump = """            iy         beet          bcl b IY tcl t
                        ih         bit           bcl b IH tcl t 
                        eh         bet           bcl b EH tcl t
                        ey         bait          bcl b EY tcl t
                        ae         bat           bcl b AE tcl t
                        aa         bott          bcl b AA tcl t
                        aw         bout          bcl b AW tcl t
                        ay         bite          bcl b AY tcl t
                        ah         but           bcl b AH tcl t
                        ao         bought        bcl b AO tcl t
                        oy         boy           bcl b OY
                        ow         boat          bcl b OW tcl t
                        uh         book          bcl b UH kcl k
                        uw         boot          bcl b UW tcl t
                        ux         toot          tcl t UX tcl t
                        er         bird          bcl b ER dcl d
                        ax         about         AX bcl b aw tcl t
                        ix         debit         dcl d eh bcl b IX tcl t
                        axr        butter        bcl b ah dx AXR
                        ax-h       suspect       s AX-H s pcl p eh kcl k tcl t"""
v_dump = v_dump.split("\n")

vowels = [' '.join(x.split()).split(" ")[0] for x in v_dump]

print(vowels)


['iy', 'ih', 'eh', 'ey', 'ae', 'aa', 'aw', 'ay', 'ah', 'ao', 'oy', 'ow', 'uh', 'uw', 'ux', 'er', 'ax', 'ix', 'axr', 'ax-h']


In [5]:
FS = 16000

In [7]:
# Extract all timit vowels: write to csv
all_phones: list = []
columns = ["filepath", "person", "sex", "start", "end", "phone"]

for path in tqdm(AUDIO_PATHS):
    _phnpath: str = path.replace(".WAV", ".PHN")

    _phns: list = []
    with open(_phnpath, mode="r") as _ph:
        _phns = _ph.readlines() # all lines : start-sample, end-sample, phone ; space seperated
        _phns = [p.strip().split(" ") for p in _phns] # split phones (last element)
        
        _per = bn(sp(_phnpath)[0])
        _sex = _per[0]
        _vows = [[path, _per, _sex] + p  for p in _phns if p[-1] in vowels] # filter phones: path, phone: vowel; lines
        _vows = [({ columns[i]: c for i, c in enumerate(v)}) for v in _vows]

    all_phones = all_phones + _vows # collect

all_timit_vowels_df = pd.DataFrame(all_phones)
all_timit_vowels_df["start"] = pd.to_numeric(all_timit_vowels_df["start"], errors='coerce')
all_timit_vowels_df["end"] = pd.to_numeric(all_timit_vowels_df["end"], errors='coerce')
all_timit_vowels_df["start_sec"] = all_timit_vowels_df["start"] / FS
all_timit_vowels_df["end_sec"] = all_timit_vowels_df["end"] / FS
columns = ["filepath", "person", "sex", "start", "end", "start_sec", "end_sec", "phone"]
all_timit_vowels_df = all_timit_vowels_df[columns]
all_timit_vowels_df.index.name = "idx"
all_timit_vowels_df.to_csv("all_timit_vowels_mf.csv", index=True)

100%|██████████| 6300/6300 [00:19<00:00, 323.78it/s] 


In [8]:
all_timit_vowels_df.to_json("all_timit_vowels_mf.json", index=True, orient="table")


## Formant estimation: Vowels from timit


In [14]:
import praat_formants_python.praat_formants_python as pfp
import numpy as np
import os
import numpy as np
import pandas as pd
import parselmouth
import librosa

from parselmouth.praat import call

In [9]:
ALL_TIMIT_VOWELS_CSV = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/scripts/all_timit_vowels_mf.csv"
klatt_json_data_path = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/CSV/for_klatt"

In [15]:
def measure_pitch(audio_path, start, end):
    f0min, f0max = [75, 500]
    
    # samples, fs = librosa.load(audio_path, sr=None)
    
    # chunk = samples[start: end]
    # wavfile.write(tmp_file, fs, chunk)
    
    sound = parselmouth.Sound(audio_path) # read the sound
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    mean_pitch = call(pitch, "Get mean", 0, 0, "Hertz") # get mean pitch
    return mean_pitch

In [17]:
# Calculate formant and pitch
all_timit_vowels_df = pd.read_csv(ALL_TIMIT_VOWELS_CSV)
formants_list = []
columns_formants = ["filepath", "person", "sex", "phone", "pitch_praat", "f1_org_praat", "f2_org_praat", "f3_org_praat"]

for index, row in tqdm(all_timit_vowels_df.iterrows()):
    formants = pfp.formants_at_interval(row['filepath'], row['start_sec'], row['end_sec'], 
                                        maxformant=5500, winlen=0.025, preemph=50)
    formants = formants.mean(axis=0)
    formants = list(formants)[1:] # skip time
    pitch = measure_pitch(row['filepath'], row['start'], row['end'])
    f_row = [row["filepath"], row["person"], row["sex"], row["phone"], pitch] + formants 
    f_row = { columns_formants[i]: x for i, x in enumerate(f_row) }
    formants_list.append(f_row)


78374it [21:16, 61.41it/s]


In [None]:
# After thought: get gender information formant
all_timit_vowels_df = pd.read_csv(ALL_TIMIT_VOWELS_CSV)
formants_list = []
for index, row in tqdm(all_timit_vowels_df.iterrows()):
    # formants = pfp.formants_at_interval(row['filepath'], row['start_sec'], row['end_sec'], 
    #                                     maxformant=5500, winlen=0.025, preemph=50)
    # formants = formants.mean(axis=0)
    filename = row["filepath"]
    f_row = [filename.split("/")[-2], row["phone"]] # + list(formants)[1:]
    formants_list.append(f_row)

In [19]:
formants_list[0][0][0]



'F'

In [18]:
# Export TIMIT vowel formants: praat
MIN_PITCH, MAX_PITCH = (75, 250)

columns_formants = ["filepath", "person", "sex", "phone", "pitch_praat", "f1_org_praat", "f2_org_praat", "f3_org_praat"]
all_timit_vowel_formants_df = pd.DataFrame(formants_list)
# all_timit_vowel_formants_df['pitch'] = np.random.randint(MIN_PITCH, MAX_PITCH, all_timit_vowel_formants_df.shape[0])

all_timit_vowel_formants_df = all_timit_vowel_formants_df[["person", "sex", "phone", "pitch_praat", "f1_org_praat", "f2_org_praat", "f3_org_praat"]]
print(all_timit_vowel_formants_df.head())

all_timit_vowel_formants_df.index.name='id'
all_timit_vowel_formants_df.to_csv("all_timit_vowel_formants_mf_p.csv", index=True)

pth_json = os.path.join(klatt_json_data_path, "all_timit_vowel_formants_mf_p.json")
all_timit_vowel_formants_df.to_json(pth_json, index=True, orient="table")

  person sex phone  pitch_praat  f1_org_praat  f2_org_praat  f3_org_praat
0  FAKS0   F    iy   243.044669    468.354889   2320.570000   2839.730333
1  FAKS0   F    ae   243.044669    840.217462   1879.477692   2768.645000
2  FAKS0   F    er   243.044669    613.416571   1818.399571   2227.838429
3  FAKS0   F    aa   243.044669    786.415600   1457.486700   2400.440200
4  FAKS0   F    uw   243.044669    522.480643   1592.871000   2758.406786


In [None]:
klatt_json_data_path = "/home/jeevan/projects/Asquire/Formants/klatt/src/DATA"
# Klatt will be used to synthesize vowels

# Formant estimation of Synthetic Klatt vowels

In [13]:
import os
from os.path import join
from tqdm import tqdm
import praat_formants_python.praat_formants_python as pfp
import numpy as np
import pandas as pd

In [15]:
ALL_SYNTH_VOWEL_PATH = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/AUDIO/synth_vows_sample_set_p"
CSV_EXP_PATH = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/CSV/rvb_formants"

synth_vowels_paths = [join(ALL_SYNTH_VOWEL_PATH, p) for p in os.listdir(ALL_SYNTH_VOWEL_PATH)]; len(synth_vowels_paths)

80

In [None]:
# Calculate formant and pitch
all_timit_vowels_df = pd.read_csv(ALL_TIMIT_VOWELS_CSV)
formants_list = []
columns_formants = ["filepath", "person", "sex", "phone", "pitch_praat", "f1_org_praat", "f2_org_praat", "f3_org_praat"]

for index, row in tqdm(all_timit_vowels_df.iterrows()):
    formants = pfp.formants_at_interval(row['filepath'], row['start_sec'], row['end_sec'], 
                                        maxformant=5500, winlen=0.025, preemph=50)
    formants = formants.mean(axis=0)
    formants = list(formants)[1:] # skip time
    pitch = measure_pitch(row['filepath'], row['start'], row['end'])
    f_row = [row["filepath"], row["person"], row["sex"], row["phone"], pitch] + formants 
    f_row = { columns_formants[i]: x for i, x in enumerate(f_row) }
    formants_list.append(f_row)

In [18]:
DUR = 2
CHUNK_DUR = DUR/3 # select middle third

synth_formants = []
for p in tqdm(synth_vowels_paths):
    formants = pfp.formants_at_interval(p, CHUNK_DUR, CHUNK_DUR * 2, 
                                        maxformant=5500, winlen=0.025, preemph=50)
    pitch_praat_synth = measure_pitch(p, 0, 0)
    name = os.path.basename(p).replace(".wav", "")
    phone, id, person, sex, pitch_praat = name.split("_")

    info = [id, name, person, sex, phone, pitch_praat, pitch_praat_synth]

    fm_mean = formants.mean(axis=0)[1:]
    fm_std = formants.std(axis=0)[1:]

    # print(formants)
    synth_formants.append(np.concatenate([info, fm_mean, fm_std]))

    # break

columns=["id", "filename", "person", "sex", "phone", "pitch_praat", "pitch_praat_synth", "f1_praat_synth_mean", "f2_praat_synth_mean","f3_praat_synth_mean","f1_praat_synth_std","f2_praat_synth_std","f3_praat_synth_std"]
synth_formants_df = pd.DataFrame(synth_formants, columns=columns)
# synth_formants_df.index.name='id'
csv_exp_path = join(CSV_EXP_PATH, "synth_vowel_formants_praat_sample_set_p.csv")
synth_formants_df.to_csv(csv_exp_path, index=False)
synth_formants_df

100%|██████████| 80/80 [00:00<00:00, 5441.67it/s]


Unnamed: 0,id,filename,person,sex,phone,pitch,f1_praat_synth_mean,f2_praat_synth_mean,f3_praat_synth_mean,f1_praat_synth_std,f2_praat_synth_std,f3_praat_synth_std
0,73,uh_73_MJFR0_M_98,MJFR0,M,uh,98,587.032515151515,898.9163333333335,1924.741439393939,8.203950253684011,77.81998260677776,11.411116239092678
1,63,aw_63_FLJG0_F_176,FLJG0,F,aw,176,769.8282575757574,947.1946363636362,1594.3844090909095,16.671369548284655,57.39322633051174,9.00480149862363
2,8,er_8_MRHL0_M_131,MRHL0,M,er,131,640.4743484848485,918.3941818181819,1262.0616212121213,5.1874519391491685,80.13409820228974,16.21765106180856
3,64,ah_64_MRPC0_M_108,MRPC0,M,ah,108,669.5146515151517,990.6596212121214,1348.0490303030301,7.086274469945884,44.85576679481088,14.553491251419459
4,78,ax-h_78_FNLP0_F_219,FNLP0,F,ax-h,219,443.8704696969698,1079.7772121212122,1562.6297878787877,2.6505430502451657,27.98807591691078,9.501403396365136
...,...,...,...,...,...,...,...,...,...,...,...,...
75,51,ay_51_FRNG0_F_231,FRNG0,F,ay,231,894.096015151515,1017.2621818181815,1712.7914848484847,85.10987361417581,71.04787081831115,21.512206543518086
76,44,oy_44_MJVW0_M_130,MJVW0,M,oy,130,600.8860757575758,809.0572424242425,1167.5704696969694,41.97027557447403,89.15665387668318,8.090353116651457
77,26,ao_26_FRNG0_F_266,FRNG0,F,ao,266,778.6580151515151,1088.1088939393937,1391.2191515151512,9.375685184645894,12.421061851510707,23.55643155838596
78,10,er_10_FJMG0_F_224,FJMG0,F,er,224,559.8470454545452,1221.5811212121216,1719.2895000000003,159.28257389234594,440.4184811219587,116.62755163355648


In [37]:
ALL_TIMIT_VOWEL_FORMANTS_PATH = "/home/jeevan/projects/Asquire/Formants/all_timit_vowel_formants.csv"
ALL_SYNTH_VOWEL_FORMANTS_PATH = "/home/jeevan/projects/Asquire/Formants/synth_vowel_formants_praat.csv"

all_timit_vowel_formants_df = pd.read_csv(ALL_TIMIT_VOWEL_FORMANTS_PATH)
all_synth_vowel_formants_df = pd.read_csv(ALL_SYNTH_VOWEL_FORMANTS_PATH)

fnames = all_synth_vowel_formants_df["filename"].values.tolist()
pitch = [int(f.split("_")[-1]) for f in fnames]
id = [int(f.split("_")[-2]) for f in fnames]


all_synth_vowel_formants_df["pitch"] = pitch
all_synth_vowel_formants_df["id"] = id
all_synth_vowel_formants_df = all_synth_vowel_formants_df.sort_values(by=["id"]).reset_index()
all_synth_vowel_formants_df = all_synth_vowel_formants_df.drop(columns=["index"])

# print(all_timit_vowel_formants_df.head())
# print(all_synth_vowel_formants_df.head())

all_synth_vowel_formants_df.to_csv("synth_vowel_formants_praat_corrected.csv", index=False)

all_vowels_formants_df = pd.merge(all_timit_vowel_formants_df, all_synth_vowel_formants_df, on=["id", "pitch", "phone"], how="left")
all_vowels_formants_df.columns


Index(['id', 'phone', 'pitch', 'f1_org_praat', 'f2_org_praat', 'f3_org_praat',
       'filename', 'f1_praat_synth_mean', 'f2_praat_synth_mean',
       'f3_praat_synth_mean', 'f1_praat_synth_std', 'f2_praat_synth_std',
       'f3_praat_synth_std'],
      dtype='object')

In [40]:
pd.set_option('precision', 2)
columns = [ 'id', 'filename', 'phone', 'pitch', 
            'f1_org_praat', 'f2_org_praat', 'f3_org_praat',
            'f1_praat_synth_mean', 'f2_praat_synth_mean', 'f3_praat_synth_mean', 
            'f1_praat_synth_std', 'f2_praat_synth_std', 'f3_praat_synth_std']
all_vowels_formants_df = all_vowels_formants_df[columns]
all_vowels_formants_df = all_vowels_formants_df.round(2)
all_vowels_formants_df.to_csv("all_vowels_formants_org_synth.csv", index=False)

all_vowels_formants_df

Unnamed: 0,id,filename,phone,pitch,f1_org_praat,f2_org_praat,f3_org_praat,f1_praat_synth_mean,f2_praat_synth_mean,f3_praat_synth_mean,f1_praat_synth_std,f2_praat_synth_std,f3_praat_synth_std
0,0,iy_0_97,iy,97,468.35,2320.57,2839.73,471.45,852.74,2308.42,4.82,58.35,12.73
1,1,ae_1_161,ae,161,840.22,1879.48,2768.64,812.25,1021.68,1899.03,5.51,26.19,9.77
2,2,er_2_241,er,241,613.42,1818.40,2227.84,621.71,812.36,1812.92,68.56,79.56,26.11
3,3,aa_3_126,aa,126,786.42,1457.49,2400.44,763.89,1015.20,1476.21,5.82,44.53,14.11
4,4,uw_4_174,uw,174,522.48,1592.87,2758.41,520.08,974.16,1581.74,3.37,52.46,8.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...
78369,78369,ao_78369_154,ao,154,578.49,821.36,2027.85,587.80,800.80,932.48,7.09,10.88,52.04
78370,78370,ih_78370_183,ih,183,491.96,1601.86,2672.21,520.56,796.22,1629.88,6.91,66.61,7.48
78371,78371,ay_78371_247,ay,247,673.32,1436.07,2380.73,595.55,761.91,1523.34,115.66,153.34,192.86
78372,78372,ih_78372_118,ih,118,422.33,2033.47,2634.35,413.59,874.07,2021.12,7.55,66.91,11.12


## Reverberation


In [49]:
import os
import shutil
import pandas as pd
from os.path import join
import librosa
import numpy as np
from scipy.fft import fft, ifft
import soundfile as sf
from tqdm import tqdm
import praat_formants_python.praat_formants_python as pfp

In [42]:
ALL_SYNTH_VOWEL_PATH = "/home/jeevan/projects/Asquire/Formants/tmpexp"
RIR_SIGNAL_PATH = "/home/jeevan/projects/Asquire/Formants/rir_data"
ALL_TIMIT_VOWEL_CSV = "/home/jeevan/projects/Asquire/Formants/all_timit_vowel_formants.csv"

FS = 16000

rir_signals = {(p.replace("RVB2014_type1_rir_", "").replace(".wav", "")): librosa.load(join(RIR_SIGNAL_PATH, p), sr=FS)[0] for p in os.listdir(RIR_SIGNAL_PATH)}
print(rir_signals.keys())

all_vowels_df = pd.read_csv(ALL_TIMIT_VOWEL_CSV)
print(all_vowels_df.tail())

dict_keys(['largeroom1_far_angla', 'largeroom1_near_anglb', 'largeroom1_far_anglb', 'smallroom1_far_angla', 'smallroom1_near_anglb', 'largeroom1_near_angla', 'mediumroom1_far_anglb', 'smallroom1_far_anglb', 'smallroom1_near_angla', 'mediumroom1_far_angla', 'mediumroom1_near_anglb', 'mediumroom1_near_angla'])
          id phone  pitch  f1_org_praat  f2_org_praat  f3_org_praat
78369  78369    ao    154        578.49        821.36       2027.85
78370  78370    ih    183        491.96       1601.86       2672.21
78371  78371    ay    247        673.32       1436.07       2380.73
78372  78372    ih    118        422.33       2033.47       2634.35
78373  78373    ih    185        508.47       1745.41       2611.96


In [68]:
TMP_RVB_FOLDER = "/home/jeevan/projects/Asquire/Formants/tmprvb"

if os.path.exists(TMP_RVB_FOLDER):
    shutil.rmtree(TMP_RVB_FOLDER)

os.mkdir(TMP_RVB_FOLDER)

In [44]:
def distort(aud_sig, rir_sig):
    rir_sig = rir_sig[: len(rir_sig) // 15]
    out_len = len(aud_sig) + len(rir_sig) - 1
    aud_sig_fft = fft(aud_sig, n=out_len)
    rir_sig_fft = fft(rir_sig, n=out_len)
    aud_sig_reverb = np.real(ifft(np.multiply(aud_sig_fft, rir_sig_fft)))[: len(aud_sig)]
    aud_sig_reverb = aud_sig_reverb / max(abs(aud_sig_reverb))

    return aud_sig_reverb

In [5]:
formants = pfp.formants_at_interval(row['filepath'], row['start_sec'], row['end_sec'], 
                                    maxformant=5500, winlen=0.025, preemph=50)

NameError: name 'row' is not defined

In [55]:
all_vowels_formants_org_synth_df

Unnamed: 0,id,filename,phone,pitch,f1_org_praat,f2_org_praat,f3_org_praat,f1_praat_synth_mean,f2_praat_synth_mean,f3_praat_synth_mean,f1_praat_synth_std,f2_praat_synth_std,f3_praat_synth_std
0,0,iy_0_97,iy,97,468.35,2320.57,2839.73,471.45,852.74,2308.42,4.82,58.35,12.73
1,1,ae_1_161,ae,161,840.22,1879.48,2768.64,812.25,1021.68,1899.03,5.51,26.19,9.77
2,2,er_2_241,er,241,613.42,1818.40,2227.84,621.71,812.36,1812.92,68.56,79.56,26.11
3,3,aa_3_126,aa,126,786.42,1457.49,2400.44,763.89,1015.20,1476.21,5.82,44.53,14.11
4,4,uw_4_174,uw,174,522.48,1592.87,2758.41,520.08,974.16,1581.74,3.37,52.46,8.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...
78369,78369,ao_78369_154,ao,154,578.49,821.36,2027.85,587.80,800.80,932.48,7.09,10.88,52.04
78370,78370,ih_78370_183,ih,183,491.96,1601.86,2672.21,520.56,796.22,1629.88,6.91,66.61,7.48
78371,78371,ay_78371_247,ay,247,673.32,1436.07,2380.73,595.55,761.91,1523.34,115.66,153.34,192.86
78372,78372,ih_78372_118,ih,118,422.33,2033.47,2634.35,413.59,874.07,2021.12,7.55,66.91,11.12


In [None]:
all_vowels_formants_org_synth_df = pd.read_csv("all_vowels_formants_org_synth.csv")

columns = all_vowels_formants_org_synth_df.columns
new_cols = ["rir_type", "f1_praat_rvb_mean", "f2_praat_rvb_mean", "f3_praat_rvb_mean", "f1_praat_rvb_std", "f2_praat_rvb_std", "f3_praat_rvb_std"]

columns = np.concatenate([columns, new_cols])

DUR = 2
CHUNK_DUR = DUR/3

rvb_formants = []
for _, vow in tqdm(all_vowels_formants_org_synth_df.iloc[:1, :].iterrows()):
    
    id = vow["id"]
    fname_aud = vow["filename"]
    fpath_aud = join(ALL_SYNTH_VOWEL_PATH, f"{fname_aud}.wav")

    if os.path.exists(TMP_RVB_FOLDER):
        shutil.rmtree(TMP_RVB_FOLDER)

    os.mkdir(TMP_RVB_FOLDER)

    #reverb
    sig_aud = librosa.load(fpath_aud , sr=FS)[0]
    for rir_typ in rir_signals.keys():
        sig_aud_rvb = distort(sig_aud, rir_signals[rir_typ])

        fname_aud_rir = f"{fname_aud}_rvb_{rir_typ}.wav"
        fpath_aud_rir = join(TMP_RVB_FOLDER, fname_aud_rir)
        
        sf.write(fpath_aud_rir, sig_aud_rvb, FS)

        formants = pfp.formants_at_interval(fpath_aud_rir, CHUNK_DUR, CHUNK_DUR * 2,
                                            maxformant=5500, winlen=0.025, preemph=50)


        info = vow.values.tolist()


        fm_mean = formants.mean(axis=0)[1:]
        fm_std = formants.std(axis=0)[1:]

        # print(formants)
        rvb_formants.append(np.concatenate([info, [rir_typ], fm_mean, fm_std]))
        
        # break

    # shutil.rmtree(TMP_RVB_FOLDER)

    # print(fpath_aud, os.path.exists(fpath_aud))

    # break

rvb_formants_df = pd.DataFrame(rvb_formants, columns=columns)
columns = ['id', 'filename', 'phone', 'pitch', 'rir_type', 
            'f1_org_praat', 'f2_org_praat', 'f3_org_praat', 
            'f1_praat_synth_mean', 'f2_praat_synth_mean', 'f3_praat_synth_mean', 'f1_praat_synth_std', 'f2_praat_synth_std', 'f3_praat_synth_std', 
            'f1_praat_rvb_mean', 'f2_praat_rvb_mean', 'f3_praat_rvb_mean', 'f1_praat_rvb_std', 'f2_praat_rvb_std', 'f3_praat_rvb_std']

rvb_formants_df.to_csv("all_vowels_formants_org_synth_rvb.csv", index=False)
# rvb_formants_df

In [74]:
rvb_formants_df.columns

Index(['id', 'filename', 'phone', 'pitch', 'f1_org_praat', 'f2_org_praat',
       'f3_org_praat', 'f1_praat_synth_mean', 'f2_praat_synth_mean',
       'f3_praat_synth_mean', 'f1_praat_synth_std', 'f2_praat_synth_std',
       'f3_praat_synth_std', 'rir_type', 'f1_praat_rvb_mean',
       'f2_praat_rvb_mean', 'f3_praat_rvb_mean', 'f1_praat_rvb_std',
       'f2_praat_rvb_std', 'f3_praat_rvb_std'],
      dtype='object')

In [None]:
sig_aud_t = librosa.load("/home/jeevan/projects/Asquire/Formants/tmpexp/iy_0_97.wav" , sr=FS)
sig_rir_t = librosa.load("/home/jeevan/projects/Asquire/Formants/rir_data/RVB2014_type1_rir_largeroom1_far_angla.wav" , sr=FS)
# sig_aud_t = np.array(sig_aud_t)
# sig_rir_t = np.array(sig_rir_t)

sig_aud_rvb = distort(sig_aud_t, sig_rir_t)
sig_aud_rvb

In [5]:
def distort2():
    sig_aud_t, _ = librosa.load("/home/jeevan/projects/Asquire/Formants/tmpexp/iy_0_97.wav" , sr=FS)
    sig_rir_t, _ = librosa.load("/home/jeevan/projects/Asquire/Formants/rir_data/RVB2014_type1_rir_largeroom1_far_angla.wav" , sr=FS)
    aud_sig = sig_aud_t
    rir_sig = sig_rir_t
    out_len = len(aud_sig) + len(rir_sig) - 1
    aud_sig_fft = fft(aud_sig, n=out_len)
    rir_sig_fft = fft(rir_sig, n=out_len)
    aud_sig_reverb = np.real(ifft(np.multiply(aud_sig_fft, rir_sig_fft)))[: len(aud_sig)]
    aud_sig_reverb = aud_sig_reverb / max(abs(aud_sig_reverb))

    return aud_sig_reverb
distort2()

array([ 1.6867904e-07, -7.3341596e-08,  6.6004830e-08, ...,
       -1.0186957e-01, -1.6129187e-01, -2.1081752e-01], dtype=float32)

In [9]:
synth_vowels = [join(ALL_SYNTH_VOWEL_PATH, p) for p in os.listdir(ALL_SYNTH_VOWEL_PATH)]