In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import shutil
from os.path import join
import praat_formants_python.praat_formants_python as pfp
from scipy.fft import fft, ifft

from multiprocessing import Pool

import librosa
import soundfile as sf

In [2]:
ALL_SYNTH_VOWEL_PATH = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/tmpexp"
RIR_SIGNAL_PATH = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/rir_data"
TMP_RVB_FOLDER = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/tmprvb"

ALL_ORG_SYN_FORMANTS = "exports/all_vowels_formants_org_synth.csv"
ALL_ORG_SYN_RVB_FORMANTS = "exports/all_vowels_formants_org_synth_rvb.csv"

pfp.clear_formant_cache()

In [3]:
all_vowels_formants_org_synth_df = pd.read_csv(ALL_ORG_SYN_FORMANTS)

columns = all_vowels_formants_org_synth_df.columns
new_cols = ["rir_type", "f1_praat_rvb_mean", "f2_praat_rvb_mean", "f3_praat_rvb_mean", "f1_praat_rvb_std", "f2_praat_rvb_std", "f3_praat_rvb_std"]

columns = np.concatenate([columns, new_cols])

In [4]:
FS = 16000
DUR = 2
CHUNK_DUR = DUR/3

rir_signals = {(p.replace("RVB2014_type1_rir_", "").replace(".wav", "")): librosa.load(join(RIR_SIGNAL_PATH, p), sr=FS)[0] for p in os.listdir(RIR_SIGNAL_PATH)}

In [5]:
def distort(aud_sig, rir_sig):
    rir_sig = rir_sig[: len(rir_sig) // 15]
    out_len = len(aud_sig) + len(rir_sig) - 1
    aud_sig_fft = fft(aud_sig, n=out_len)
    rir_sig_fft = fft(rir_sig, n=out_len)
    aud_sig_reverb = np.real(ifft(np.multiply(aud_sig_fft, rir_sig_fft)))[: len(aud_sig)]
    aud_sig_reverb = aud_sig_reverb / max(abs(aud_sig_reverb))

    return aud_sig_reverb

In [6]:
all_vowels_formants_org_synth_df

dfs_list = np.array_split(all_vowels_formants_org_synth_df, 8)

[print(len(d)* 8) for d in dfs_list]

78376
78376
78376
78376
78376
78376
78368
78368


[None, None, None, None, None, None, None, None]

In [15]:
# columns = all_vowels_formants_org_synth_df.columns
# new_cols = ["rir_type", "f1_praat_rvb_mean", "f2_praat_rvb_mean", "f3_praat_rvb_mean", "f1_praat_rvb_std", "f2_praat_rvb_std", "f3_praat_rvb_std"]
# columns = np.concatenate([columns, new_cols])

def export_formants(df: pd.DataFrame):

    rvb_formants = []
    N_VOWEL_INSTANCES = len(df)
    split_id = df.iloc[0, : ]["id"]
    ALL_ORG_SYN_RVB_FORMANTS = f"all_vowels_formants_org_synth_rvb_{split_id}.csv"

    for i in tqdm(range(N_VOWEL_INSTANCES)):

        vow = df.iloc[i, : ]
        
        fname_aud = vow["filename"]
        fpath_aud = join(ALL_SYNTH_VOWEL_PATH, f"{fname_aud}.wav")

        if os.path.exists(TMP_RVB_FOLDER):
            shutil.rmtree(TMP_RVB_FOLDER)

        os.mkdir(TMP_RVB_FOLDER)

        #reverb
        sig_aud = librosa.load(fpath_aud , sr=FS)[0]
        for rir_typ in rir_signals.keys():
            sig_aud_rvb = distort(sig_aud, rir_signals[rir_typ])

            fname_aud_rir = f"{fname_aud}_rvb_{rir_typ}.wav"
            fpath_aud_rir = join(TMP_RVB_FOLDER, fname_aud_rir)
            
            sf.write(fpath_aud_rir, sig_aud_rvb, FS)

            formants = pfp.formants_at_interval(fpath_aud_rir, CHUNK_DUR, CHUNK_DUR * 2,
                                                maxformant=5500, winlen=0.025, preemph=50)

            fm_mean = formants.mean(axis=0)[1:]
            fm_std = formants.std(axis=0)[1:]

            info = vow.values.tolist()
            rvb_formants.append(np.concatenate([info, [rir_typ], fm_mean, fm_std]))
    
    columns = df.columns
    new_cols = ["rir_type", "f1_praat_rvb_mean", "f2_praat_rvb_mean", "f3_praat_rvb_mean", "f1_praat_rvb_std", "f2_praat_rvb_std", "f3_praat_rvb_std"]
    columns = np.concatenate([columns, new_cols])

    rvb_formants_df = pd.DataFrame(rvb_formants, columns=columns)

    columns_arrange = [ 'id', 'filename', 'phone', 'pitch', 'rir_type', 
                        'f1_org_praat', 'f2_org_praat', 'f3_org_praat', 
                        'f1_praat_synth_mean', 'f2_praat_synth_mean', 'f3_praat_synth_mean', 'f1_praat_synth_std', 'f2_praat_synth_std', 'f3_praat_synth_std', 
                        'f1_praat_rvb_mean', 'f2_praat_rvb_mean', 'f3_praat_rvb_mean', 'f1_praat_rvb_std', 'f2_praat_rvb_std', 'f3_praat_rvb_std']
    rvb_formants_df = rvb_formants_df[columns_arrange]
    rvb_formants_df.index.name = "slno" 
    rvb_formants_df.to_csv(ALL_ORG_SYN_RVB_FORMANTS, index=True)

In [None]:
export_formants(dfs_list[0])

In [9]:
pool = Pool()
result1 = pool.apply_async(export_formants, [dfs_list[0], 0])    # evaluate "solve1(A)" asynchronously
result2 = pool.apply_async(export_formants, [dfs_list[1], 1])    # evaluate "solve2(B)" asynchronously

In [45]:
import os
import pandas as pd
pd.set_option('precision', 2)
ALL_VOWEL_FORMANTS_ORG_SYT_RVB_PATH = "/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/exp_chunks"
all_vowel_csv_chunks = [os.path.join(ALL_VOWEL_FORMANTS_ORG_SYT_RVB_PATH, fn) for fn in os.listdir(ALL_VOWEL_FORMANTS_ORG_SYT_RVB_PATH)]
all_vowels_df_list = [pd.read_csv(fp) for fp in all_vowel_csv_chunks]

all_vowels_df = pd.concat(all_vowels_df_list)




In [46]:
pd.set_option('precision', 2)
all_vowels_df = pd.concat(all_vowels_df_list)
all_vowels_df = all_vowels_df.sort_values(by=["id"])
all_vowels_df = all_vowels_df.drop(["slno"], axis=1)
all_vowels_df = all_vowels_df.reset_index()
all_vowels_df = all_vowels_df.drop(["index"], axis=1)
all_vowels_df.index.name = "slno"
all_vowels_df.to_csv("all_vowels_formants_org_synth_rvb.csv", index=True, float_format="%.2f")

## For error bars

In [1]:
import pandas as pd
import numpy as np

compare_rir_df = pd.read_csv("/home/jeevan/Jeevan_K/Projects/Asquire/Reverb-Quest/Formants/compare_rirs.csv")


# compare_rir_df[f"error_mean_f1_synth_clean"] = np.abs(compare_rir_df[f"f1_praat_synth_mean"] - compare_rir_df[f"f1_org_praat"])

In [2]:
compare_rir_df

Unnamed: 0,slno,id,filename,phone,pitch,rir_type,f1_org_praat,f2_org_praat,f3_org_praat,f1_praat_synth_mean,...,f3_praat_synth_mean,f1_praat_synth_std,f2_praat_synth_std,f3_praat_synth_std,f1_praat_rvb_mean,f2_praat_rvb_mean,f3_praat_rvb_mean,f1_praat_rvb_std,f2_praat_rvb_std,f3_praat_rvb_std
0,0,0,iy_0_97,iy,97,largeroom1_far_angla,468.35,2320.57,2839.73,471.45,...,2308.42,4.82,58.35,12.73,486.19,1927.71,2439.85,7.60,239.71,165.34
1,1,0,iy_0_97,iy,97,mediumroom1_near_angla,468.35,2320.57,2839.73,471.45,...,2308.42,4.82,58.35,12.73,485.50,2123.21,2691.87,16.24,349.53,213.77
2,2,0,iy_0_97,iy,97,mediumroom1_near_anglb,468.35,2320.57,2839.73,471.45,...,2308.42,4.82,58.35,12.73,406.79,1598.90,2504.10,114.25,859.37,217.55
3,3,0,iy_0_97,iy,97,mediumroom1_far_angla,468.35,2320.57,2839.73,471.45,...,2308.42,4.82,58.35,12.73,489.40,1910.30,2433.17,13.45,212.76,144.82
4,4,0,iy_0_97,iy,97,smallroom1_near_angla,468.35,2320.57,2839.73,471.45,...,2308.42,4.82,58.35,12.73,241.58,1364.61,2510.84,201.80,802.81,242.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018857,940438,78369,ao_78369_154,ao,154,clean,578.49,821.36,2027.85,587.80,...,932.48,7.09,10.88,52.04,587.80,800.80,932.48,7.09,10.88,52.04
1018858,940451,78370,ih_78370_183,ih,183,clean,491.96,1601.86,2672.21,520.56,...,1629.88,6.91,66.61,7.48,520.56,796.22,1629.88,6.91,66.61,7.48
1018859,940462,78371,ay_78371_247,ay,247,clean,673.32,1436.07,2380.73,595.55,...,1523.34,115.66,153.34,192.86,595.55,761.91,1523.34,115.66,153.34,192.86
1018860,940474,78372,ih_78372_118,ih,118,clean,422.33,2033.47,2634.35,413.59,...,2021.12,7.55,66.91,11.12,413.59,874.07,2021.12,7.55,66.91,11.12


In [7]:
ftags = ["f1", "f2", "f3"]

for f in ftags:
    # compare_rir_df[f"error_mean_{f}_synth_rvb"] = np.abs(compare_rir_df[f"{f}_praat_rvb_mean"] - compare_rir_df[f"{f}_org_praat"])
    compare_rir_df[f"error_mean_{f}_synth_rvb"] = (compare_rir_df[f"{f}_praat_rvb_mean"] - compare_rir_df[f"{f}_org_praat"])

compare_rir_df = compare_rir_df.sort_values(by=["filename"])

compare_rir_df.tail(14)


Unnamed: 0,slno,id,filename,phone,pitch,rir_type,f1_org_praat,f2_org_praat,f3_org_praat,f1_praat_synth_mean,...,f3_praat_synth_std,f1_praat_rvb_mean,f2_praat_rvb_mean,f3_praat_rvb_mean,f1_praat_rvb_std,f2_praat_rvb_std,f3_praat_rvb_std,error_mean_f1_synth_rvb,error_mean_f2_synth_rvb,error_mean_f3_synth_rvb
119440,119440,9953,ux_9953_128,ux,128,largeroom1_near_anglb,416.86,1706.66,2564.03,393.99,...,14.08,161.54,858.35,1729.23,38.94,353.52,110.1,-255.32,-848.31,-834.8
119959,119959,9996,ux_9996_184,ux,184,smallroom1_near_anglb,624.0,1271.45,2624.58,578.48,...,7.66,563.52,1209.98,2115.19,124.31,186.96,378.16,-60.48,-61.47,-509.39
119958,119958,9996,ux_9996_184,ux,184,largeroom1_near_angla,624.0,1271.45,2624.58,578.48,...,7.66,534.98,992.41,1534.94,38.54,209.32,411.22,-89.02,-279.04,-1089.64
119960,119960,9996,ux_9996_184,ux,184,smallroom1_far_angla,624.0,1271.45,2624.58,578.48,...,7.66,465.12,1121.71,1750.89,164.91,284.36,440.74,-158.88,-149.74,-873.69
119957,119957,9996,ux_9996_184,ux,184,mediumroom1_far_anglb,624.0,1271.45,2624.58,578.48,...,7.66,601.18,907.17,1499.09,74.39,48.87,59.22,-22.82,-364.28,-1125.49
119961,119961,9996,ux_9996_184,ux,184,largeroom1_far_anglb,624.0,1271.45,2624.58,578.48,...,7.66,436.55,913.09,1340.32,69.49,84.56,64.02,-187.45,-358.36,-1284.26
119954,119954,9996,ux_9996_184,ux,184,mediumroom1_far_angla,624.0,1271.45,2624.58,578.48,...,7.66,386.52,863.32,1589.16,198.93,277.2,426.49,-237.48,-408.13,-1035.42
119963,119963,9996,ux_9996_184,ux,184,largeroom1_far_angla,624.0,1271.45,2624.58,578.48,...,7.66,478.71,792.45,1358.36,55.34,41.43,45.22,-145.29,-479.0,-1266.22
950484,119963,9996,ux_9996_184,ux,184,clean,624.0,1271.45,2624.58,578.48,...,7.66,578.48,1031.21,1286.95,7.05,40.47,7.66,-45.52,-240.24,-1337.63
119953,119953,9996,ux_9996_184,ux,184,mediumroom1_near_anglb,624.0,1271.45,2624.58,578.48,...,7.66,361.56,947.73,1419.33,121.83,309.81,299.31,-262.44,-323.72,-1205.25


In [4]:
compare_rir_df.columns

Index(['slno', 'id', 'filename', 'phone', 'pitch', 'rir_type', 'f1_org_praat',
       'f2_org_praat', 'f3_org_praat', 'f1_praat_synth_mean',
       'f2_praat_synth_mean', 'f3_praat_synth_mean', 'f1_praat_synth_std',
       'f2_praat_synth_std', 'f3_praat_synth_std', 'f1_praat_rvb_mean',
       'f2_praat_rvb_mean', 'f3_praat_rvb_mean', 'f1_praat_rvb_std',
       'f2_praat_rvb_std', 'f3_praat_rvb_std', 'error_mean_f1_synth_rvb',
       'error_mean_f2_synth_rvb', 'error_mean_f3_synth_rvb'],
      dtype='object')

In [5]:
compare_rir_min_df = compare_rir_df[['id', 'phone', 'pitch', 'rir_type', 
                                     'f1_org_praat',            'f2_org_praat',            'f3_org_praat', 
                                     'f1_praat_rvb_mean',       'f2_praat_rvb_mean',       'f3_praat_rvb_mean', 
                                     'f1_praat_rvb_std',        'f2_praat_rvb_std',        'f3_praat_rvb_std', 
                                     'error_mean_f1_synth_rvb', 'error_mean_f2_synth_rvb', 'error_mean_f3_synth_rvb']]

compare_rir_min_df.reset_index()

Unnamed: 0,index,id,phone,pitch,rir_type,f1_org_praat,f2_org_praat,f3_org_praat,f1_praat_rvb_mean,f2_praat_rvb_mean,f3_praat_rvb_mean,f1_praat_rvb_std,f2_praat_rvb_std,f3_praat_rvb_std,error_mean_f1_synth_rvb,error_mean_f2_synth_rvb,error_mean_f3_synth_rvb
0,120381,10031,aa,124,largeroom1_near_anglb,710.4,1337.40,2590.64,652.45,1288.82,2118.06,336.27,154.47,579.10,-57.95,-48.58,-472.58
1,950519,10031,aa,124,clean,710.4,1337.40,2590.64,715.92,840.12,1358.22,61.16,55.22,8.84,5.52,-497.28,-1232.42
2,120372,10031,aa,124,mediumroom1_far_angla,710.4,1337.40,2590.64,651.39,837.73,1465.84,95.55,105.00,75.62,-59.01,-499.67,-1124.80
3,120373,10031,aa,124,mediumroom1_near_angla,710.4,1337.40,2590.64,603.61,1241.02,2298.64,283.45,258.61,502.23,-106.79,-96.38,-292.00
4,120374,10031,aa,124,mediumroom1_near_anglb,710.4,1337.40,2590.64,643.16,1210.84,2009.25,212.84,213.19,580.32,-67.24,-126.56,-581.39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018857,119960,9996,ux,184,smallroom1_far_angla,624.0,1271.45,2624.58,465.12,1121.71,1750.89,164.91,284.36,440.74,-158.88,-149.74,-873.69
1018858,119958,9996,ux,184,largeroom1_near_angla,624.0,1271.45,2624.58,534.98,992.41,1534.94,38.54,209.32,411.22,-89.02,-279.04,-1089.64
1018859,119959,9996,ux,184,smallroom1_near_anglb,624.0,1271.45,2624.58,563.52,1209.98,2115.19,124.31,186.96,378.16,-60.48,-61.47,-509.39
1018860,119955,9996,ux,184,smallroom1_near_angla,624.0,1271.45,2624.58,512.85,1035.80,1634.85,89.29,212.57,496.61,-111.15,-235.65,-989.73


In [6]:
compare_rir_min_errors_df = compare_rir_min_df[['id', 'phone', 'pitch', 'rir_type', 
                                                # 'f1_org_praat',            'f2_org_praat',            'f3_org_praat', 
                                                # 'f1_praat_rvb_mean',       'f2_praat_rvb_mean',       'f3_praat_rvb_mean', 
                                                'f1_praat_rvb_std',        'f2_praat_rvb_std',        'f3_praat_rvb_std', 
                                                'error_mean_f1_synth_rvb', 'error_mean_f2_synth_rvb', 'error_mean_f3_synth_rvb']]
compare_rir_min_errors_df = compare_rir_min_errors_df.reset_index()
compare_rir_min_errors_df.index.name = "slno"
compare_rir_min_errors_df.to_csv(f"compare_rir_min_errors_norm.csv", index=True, float_format="%.2f")
compare_rir_min_errors_df

Unnamed: 0_level_0,index,id,phone,pitch,rir_type,f1_praat_rvb_std,f2_praat_rvb_std,f3_praat_rvb_std,error_mean_f1_synth_rvb,error_mean_f2_synth_rvb,error_mean_f3_synth_rvb
slno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,120381,10031,aa,124,largeroom1_near_anglb,336.27,154.47,579.10,-57.95,-48.58,-472.58
1,950519,10031,aa,124,clean,61.16,55.22,8.84,5.52,-497.28,-1232.42
2,120372,10031,aa,124,mediumroom1_far_angla,95.55,105.00,75.62,-59.01,-499.67,-1124.80
3,120373,10031,aa,124,mediumroom1_near_angla,283.45,258.61,502.23,-106.79,-96.38,-292.00
4,120374,10031,aa,124,mediumroom1_near_anglb,212.84,213.19,580.32,-67.24,-126.56,-581.39
...,...,...,...,...,...,...,...,...,...,...,...
1018857,119960,9996,ux,184,smallroom1_far_angla,164.91,284.36,440.74,-158.88,-149.74,-873.69
1018858,119958,9996,ux,184,largeroom1_near_angla,38.54,209.32,411.22,-89.02,-279.04,-1089.64
1018859,119959,9996,ux,184,smallroom1_near_anglb,124.31,186.96,378.16,-60.48,-61.47,-509.39
1018860,119955,9996,ux,184,smallroom1_near_angla,89.29,212.57,496.61,-111.15,-235.65,-989.73


In [40]:
room = compare_rir_min_errors_df[(compare_rir_min_errors_df["rir_type"] == "largeroom1_near_anglb") & (compare_rir_min_errors_df["phone"] == "aa")]
room

Unnamed: 0_level_0,slno,id,phone,pitch,rir_type,f1_praat_rvb_std,f2_praat_rvb_std,f3_praat_rvb_std,error_mean_f1_synth_rvb,error_mean_f2_synth_rvb,error_mean_f3_synth_rvb
slno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,120381,10031,aa,124,largeroom1_near_anglb,336.27,154.47,579.10,57.95,48.58,472.58
19,12058,1004,aa,108,largeroom1_near_anglb,19.70,34.98,179.34,10.27,1.27,236.00
34,120798,10066,aa,109,largeroom1_near_anglb,8.46,102.13,253.22,45.06,61.49,181.74
41,121833,10152,aa,170,largeroom1_near_anglb,140.77,102.64,320.74,135.25,148.48,925.06
54,121858,10154,aa,216,largeroom1_near_anglb,103.10,156.23,318.85,47.05,7.23,329.02
...,...,...,...,...,...,...,...,...,...,...,...
54499,119254,9937,aa,126,largeroom1_near_anglb,237.88,114.81,308.51,97.09,46.86,130.82
54515,11924,993,aa,190,largeroom1_near_anglb,278.87,353.76,509.21,282.36,354.82,802.58
54531,119301,9941,aa,143,largeroom1_near_anglb,105.18,88.78,314.79,74.94,96.28,159.17
54541,119434,9952,aa,168,largeroom1_near_anglb,156.05,219.29,315.64,198.06,285.96,781.43


In [39]:
room_d = room.drop(columns=["slno",	"id",	"phone",	"pitch",	"rir_type"])
mean = room_d.mean(axis=0)
std = room_d.std(axis=0)
min = room_d.min(axis=0)
max = room_d.max(axis=0)

max - min

mean - std

KeyError: "['slno' 'id' 'phone' 'pitch' 'rir_type'] not found in axis"