/
aa_dataset.py
86 lines (70 loc) · 2.84 KB
/
aa_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
A Pylearn2 Dataset object for accessing TIMIT with all the preprocessing that I want
"""
__authors__ = 'David Krueger'
__copyright__ = "Copyright 2014, Universite de Montreal"
__credits__ = ["David Krueger"]
__license__ = "3-clause BSD"
__maintainer__ = "David Krueger"
############
import numpy
np = numpy
from numpy import array as A
from pylearn2.datasets.dense_design_matrix import DefaultViewConverter
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.space import VectorSpace, Conv2DSpace, CompositeSpace
class AA(DenseDesignMatrix):
"""
A Pylearn2 Dataset object for accessing TIMIT w/preprocessing
The dataset is constructed by taking all of the sequences whose lengths are
between stop and stop+window. These sequences are all trimmed to length
stop by removing samples from the beginning. These are the input
sequences. Output sequences are generated by removing the first
frame_width audio samples from the sequence, which will only be used to
predict the 1st output (which is the frame_width+1st sample).
"""
# Mean and standard deviation of the acoustic samples from the whole
# dataset (train, valid, test).
_mean = 0.0035805809921434142
_std = 542.48824133746177
def __init__(self,
which_set='train',
seq_len=1001,
transformer=None,
start=0,
stop=45000,
window=2000,
frame_width=250,
preprocessor=None,
fit_preprocessor=False,
axes=('b', 0, 1, 'c'),
fit_test_preprocessor=False,
space_preserving=False,
output_channels=1,
switch_axes=False):
self.__dict__.update(locals())
del self.self
dat = np.load('/data/lisa_ubi/speech/onomatopoeia/dataset/per_phone_timit/wav_aa.npy')
lengths = [len(i) for i in dat]
daat = A([A(dat[i][:seq_len]) for i in range(len(dat)) if lengths[i] > seq_len])
preprocessing = True
if preprocessing:
self.mean = np.mean(daat)
daat -= self.mean
self.std = np.std(daat)
daat /= self.std
if which_set == 'train':
daat = daat[:int(.8*len(daat))]
if which_set == 'valid':
daat = daat[int(.8*len(daat)):int(.9*len(daat))]
if which_set == 'test':
daat = daat[int(.9*len(daat)):]
features = daat[:,:-1]
targets = daat[:,frame_width:]
IMAGES_SHAPE = [seq_len-1, 1, 1]
print IMAGES_SHAPE
print targets.shape
print features.shape
X, y = features, targets
view_converter = DefaultViewConverter(shape=IMAGES_SHAPE, axes=axes)
super(AA, self).__init__(X=X, y=y, view_converter=view_converter)