In [1]:
import os
import pyspch.sp as Sps
import pyspch.display as Spd
import pyspch.core as Spch
import pyspch.nn as Spnn

In [2]:
## Paths
timit_path = 'W:/timit/CDdata/timit/' # to extract corpus, features, labels
remote_path = 'https://homes.esat.kuleuven.be/~spchlab/pyspch/' # to read corpus, features, labels
local_path = 'D:/gitlab/psi/compi1234/nn/' # to write corpus, features, labels
os.chdir(local_path)

## Prepare TIMIT data 

In [3]:
# get corpus      
timit_fnames = corpus.get_corpus_timit(timit_path) 

# corpus subsets 
timit_train = corpus.filter_list_timit(timit_fnames, split='train')
timit_test = corpus.filter_list_timit(timit_fnames, split='test')
timit_train_dr1 = corpus.filter_list_timit(timit_fnames, split='train', region='dr1')
timit_test_dr1 = corpus.filter_list_timit(timit_fnames, split='test', region='dr1')

# write to disk (no meta data)
os.makedirs('data/', exist_ok=True)
utils.write_txt(timit_train, 'data/timit_train.corpus')
utils.write_txt(timit_test, 'data/timit_test.corpus')
utils.write_txt(timit_train_dr1, 'data/timit_train_dr1.corpus')
utils.write_txt(timit_test_dr1, 'data/timit_test_dr1.corpus')

# meta data + write to disk
timit_meta = corpus.get_timit_metadata(timit_fnames)
timit_meta.to_csv("data/timit.meta", sep='\t', index=False, header=False)

## Extract TIMIT features (for exercise sessions)

In [5]:
# SpchData with full corpus (timit_fnames)
spchdata = corpus.SpchData(timit_fnames)
print(f'Corpus contains {len(timit_fnames)} files')

# Reader and Writer (for formatting)
mode = 'numpy'
extension = '.npy'
writer = corpus.ArrayWriter(mode, extension)
reader = corpus.ArrayReader(mode, extension)

# paths
feature_path = 'W:/timit/CDdata/timit/'
feature_extension = ".wav"

Corpus contains 6300 files


### Mel filterbanks (mel80)

In [None]:
# A. Mel filterbanks (mel80)

# arguments
write_feature_path = 'data/mel80'
feature_args = {
    'spg': None, 'Deltas': None, 'Norm': None,
    'sample_rate': 16000, 'f_shift': 0.01, 'f_length': 0.03,
    'preemp': 0.97, 'window': 'hamm', 'mode': 'dB',
    'n_mels': 80, 'n_cep': None
    }

# extract and write features
spchdata.extract_features(feature_path, feature_args, feature_extension)
spchdata.write_features(write_feature_path, writer)

# write feature_args 
feature_args_fname = os.path.join(write_feature_path, 'feature_args.json')
utils.write_json(feature_args, feature_args_fname)

### Mel filterbanks cepstral coeffients (mfcc13)

In [6]:
# B. Mel Frequency Cepstral Coeffients (mfcc13)

# arguments
write_feature_path = 'data/mfcc13'
feature_args = {
    'spg': None, 'Deltas': None, 'Norm': None,
    'sample_rate': 16000, 'f_shift': 0.01, 'f_length': 0.03,
    'preemp': 0.97, 'window': 'hamm', 'mode': 'dB',  
    'n_mels': 24, 'n_cep': 13 
    }

# extract and write features
spchdata.extract_features(feature_path, feature_args, feature_extension)
spchdata.write_features(write_feature_path, writer)

# write feature_args 
feature_args_fname = os.path.join(write_feature_path, 'feature_args.json')
utils.write_json(feature_args, feature_args_fname)