aa_dataset.py

"""
A Pylearn2 Dataset object for accessing TIMIT with all the preprocessing that I want
"""
__authors__ = 'David Krueger'
__copyright__ = "Copyright 2014, Universite de Montreal"
__credits__ = ["David Krueger"]
__license__ = "3-clause BSD"
__maintainer__ = "David Krueger"

############

import numpy
np = numpy
from numpy import array as A

from pylearn2.datasets.dense_design_matrix import DefaultViewConverter
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.space import VectorSpace, Conv2DSpace, CompositeSpace

class AA(DenseDesignMatrix):
    """
    A Pylearn2 Dataset object for accessing TIMIT w/preprocessing
    The dataset is constructed by taking all of the sequences whose lengths are
    between stop and stop+window.  These sequences are all trimmed to length
    stop by removing samples from the beginning.  These are the input
    sequences.  Output sequences are generated by removing the first
    frame_width audio samples from the sequence, which will only be used to
    predict the 1st output (which is the frame_width+1st sample).
    """

    # Mean and standard deviation of the acoustic samples from the whole
    # dataset (train, valid, test).
    _mean = 0.0035805809921434142
    _std = 542.48824133746177

    def __init__(self, 
                 which_set='train',
                 seq_len=1001,
                 transformer=None,
                 start=0,
                 stop=45000,
                 window=2000,
                 frame_width=250,
                 preprocessor=None,
                 fit_preprocessor=False,
                 axes=('b', 0, 1, 'c'),
                 fit_test_preprocessor=False,
                 space_preserving=False,
                 output_channels=1,
                 switch_axes=False):

        self.__dict__.update(locals())
        del self.self

        dat = np.load('/data/lisa_ubi/speech/onomatopoeia/dataset/per_phone_timit/wav_aa.npy')
        lengths = [len(i) for i in dat]
        daat = A([A(dat[i][:seq_len]) for i in range(len(dat)) if lengths[i] > seq_len])
        preprocessing = True
        if preprocessing:
            self.mean = np.mean(daat)
            daat -= self.mean
            self.std = np.std(daat)
            daat /= self.std

        if which_set == 'train':
            daat = daat[:int(.8*len(daat))]

        if which_set == 'valid':
            daat = daat[int(.8*len(daat)):int(.9*len(daat))]

        if which_set == 'test':
            daat = daat[int(.9*len(daat)):]

        features = daat[:,:-1]
        targets = daat[:,frame_width:]

        IMAGES_SHAPE = [seq_len-1, 1, 1]
        print IMAGES_SHAPE
        print targets.shape
        print features.shape

        X, y = features, targets
        view_converter = DefaultViewConverter(shape=IMAGES_SHAPE, axes=axes)
        super(AA, self).__init__(X=X, y=y, view_converter=view_converter)