In [1]:
import pickle
import os
import numpy as np
import librosa
from python_speech_features import fbank
import shutil
import pandas as pd

In [2]:
def normalize_frames(m,Scale=True):
    if Scale:
        return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
    else:
        return (m - np.mean(m, axis=0))

### Compute features and store it

In [3]:
dataset_info_file = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/dataset_info.csv'

In [4]:
df = pd.read_csv(dataset_info_file)

In [5]:
df

Unnamed: 0,spk_id,ch_id,utter_path
0,374,180299,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
1,374,180299,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
2,374,180299,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
3,374,180299,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
4,374,180299,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
...,...,...,...
9995,4640,19187,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
9996,4640,19187,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
9997,4640,19187,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...
9998,4640,19187,/cas/DeepLearn/elperu/tmp/speech_datasets/Libr...


In [6]:
users = df.spk_id.unique()
print(f'N users: {len(users)}\nCurrent users: {users}')

N users: 250
Current users: [ 374 7800 2514 3240 1088 5456 5750 1246 8238 1263 7505  587  226 1743
 4214 5789 7635 5390  307 7447 4362 6529  233 3242 1624 4297 6181 6367
 3723 7113 6563  403 5778 3112 7367 7078   32 5322 3214 6818  481 5104
 6385 5192 8226 3830 2989 8324  163  150 6476 1069 3983 1183 4788  426
  311 2196  103  446 1502 8975 8770 1992 5678 8014 2182 7178  201 1034
 5703 1363  250 6836 3168 1553 5163   89 1334   19 5393 4481 4160 8312
 6415   87 7067 5688 2843  909   40  322 8797 2764 6848 3947 4014 6531
 3664 3259 4441 5049 4018 4088 4853 7226 4859   78 3440  460 2893 4680
  302 4830 2518 4898 7780 1926 1963 1841 3526  254 1970 6209  458 7148
  831 6147  839 8425  200 1723 2416 6019 4813 1455 2391 2910 6000 7302
 2817  445 8468 2384 8630 4267   26  118  328 1867 3374 5022 8108 6081
 8095 5514 8838 2007 7794 8123 5463 2002  196  248  198 4340 5339 6454
 4051 3982 6078 3857 1098 5867 2159   83  730 1235 8629  696  289 1116
 5808 8063 8465 6272 6064  412 3607 1594 7278  62

In [7]:
# Create dir tree

In [8]:
out_path = '/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_features/'

In [10]:
shutil.rmtree(out_path, ignore_errors=True)

for user in users:
    os.makedirs(os.path.join(out_path, str(user)))

In [11]:
sample_rate = 16000

tot_rows = []
for idx, row in df.iterrows():
    
    spk_id = row['spk_id']
    _path = row['utter_path']
    
    utter_name = os.path.basename(_path).split('.')[0]
    
    audio, sr = librosa.load(_path, sr=sample_rate, mono=True)

    filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=40, winlen=0.025)

    filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))

    feature = normalize_frames(filter_banks, Scale=False)

    out = {'label': str(spk_id),
           'feat':feature}


    pickle_file = os.path.join(out_path, str(spk_id), f'{utter_name}.p')
    
    tot_rows.append([spk_id, _path, os.path.abspath(pickle_file)])
    
    with open(pickle_file, 'wb') as f:
        pickle.dump(out, f)

In [12]:
len(tot_rows)

10000

In [13]:
train_df = pd.DataFrame(tot_rows, columns=['spk_id', 'flac_path', 'feature_path'])

In [14]:
train_df.to_csv('/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_info.csv', index=False)

In [15]:
a = pd.read_csv('/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_info.csv')

In [18]:
for s in os.listdir('/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_features/'):
    c = os.listdir(os.path.join('/cas/DeepLearn/elperu/tmp/speech_datasets/LibriSpeech/train_features/', s))
    print(f'{s}, {len(c)}')

374, 40
7800, 40
2514, 40
3240, 40
1088, 40
5456, 40
5750, 40
1246, 40
8238, 40
1263, 40
7505, 40
587, 40
226, 40
1743, 40
4214, 40
5789, 40
7635, 40
5390, 40
307, 40
7447, 40
4362, 40
6529, 40
233, 40
3242, 40
1624, 40
4297, 40
6181, 40
6367, 40
3723, 40
7113, 40
6563, 40
403, 40
5778, 40
3112, 40
7367, 40
7078, 40
32, 40
5322, 40
3214, 40
6818, 40
481, 40
5104, 40
6385, 40
5192, 40
8226, 40
3830, 40
2989, 40
8324, 40
163, 40
150, 40
6476, 40
1069, 40
3983, 40
1183, 40
4788, 40
426, 40
311, 40
2196, 40
103, 40
446, 40
1502, 40
8975, 40
8770, 40
1992, 40
5678, 40
8014, 40
2182, 40
7178, 40
201, 40
1034, 40
5703, 40
1363, 40
250, 40
6836, 40
3168, 40
1553, 40
5163, 40
89, 40
1334, 40
19, 40
5393, 40
4481, 40
4160, 40
8312, 40
6415, 40
87, 40
7067, 40
5688, 40
2843, 40
909, 40
40, 40
322, 40
8797, 40
2764, 40
6848, 40
3947, 40
4014, 40
6531, 40
3664, 40
3259, 40
4441, 40
5049, 40
4018, 40
4088, 40
4853, 40
7226, 40
4859, 40
78, 40
3440, 40
460, 40
2893, 40
4680, 40
302, 40
4830, 40
2518,