Initial files

amaas · Feb 26, 2013 · b78602c · b78602c
1 parent 600516d
commit b78602c
Show file tree

Hide file tree

Showing 12 changed files with 968 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Emacs backup files
+*~
diff --git a/drdae_obj.m b/drdae_obj.m
@@ -0,0 +1,215 @@
+function [ cost, grad, numTotal, pred_cell ] = drdae_obj( theta, eI, data_cell, targets_cell, fprop_only, pred_out)
+%PRNN_OBJ MinFunc style objective for Deep Recurrent Denoising Autoencoder
+%   theta is the full parameter vector
+%   eI contains experiment / network architecture
+%   data_cell is a cell array of matrices. Each a distinct length is a cell
+%             entry. Each matrix has a time series example in each column
+%   targets_cell is parallel to data, but contains the labels for each time
+%   fprop_only is a flag that only computes the cost, no gradient
+%   eI.recurrentOnly is a flag for computing gradients for only the
+%   recurrent layer. All other gradients set to 0
+%   numTotal is total number of frames evaluated
+%   pred_out is a binary flag for whether pred_cell is populated
+%            pred_cell only filled properly when utterances one per cell
+
+
+%% Debug: Turns this into an identity-function for debugging rest of system
+if isfield(eI, 'objReturnsIdentity') && eI.objReturnsIdentity
+    cost = 0; grad = 0; numTotal = 0;
+    for l = 1:numel(data_cell)
+        numUtterances = size(data_cell{l}, 2);
+        original_vector = reshape(data_cell{l}, eI.winSize*eI.featDim, []);
+        midPnt = ceil(eI.winSize/2);
+        original_vector = original_vector((midPnt-1)*14+1 : midPnt*14, :);
+        pred_cell{l} = reshape(original_vector, [], numUtterances);
+    end
+    return;
+end
+
+%if isempty(return_activation),
+  return_activation = 0;
+%end
+
+%% Load data from globals if not passed in (happens when run on RPC slave)
+global g_data_cell;
+global g_targets_cell;
+isSlave = false;
+if isempty(data_cell)
+    data_cell = g_data_cell;
+    targets_cell = g_targets_cell;
+    isSlave = true;
+end;
+pred_cell = cell(1,numel(data_cell));
+act_cell = cell(1,numel(data_cell));
+%% default short circuits to false
+if ~isfield(eI, 'shortCircuit')
+    eI.shortCircuit = 0;
+end;
+
+%% default dropout to false
+if ~isfield(eI, 'dropout')
+  eI.dropout = 0;
+end;
+
+%% setup weights and accumulators
+[stack, W_t] = rnn_params2stack(theta, eI);
+cost = 0; numTotal = 0;
+outputDim = eI.layerSizes(end);
+%% setup structures to aggregate gradients
+stackGrad = cell(1,numel(eI.layerSizes));
+W_t_grad = zeros(size(W_t));
+for l = 1:numel(eI.layerSizes)
+    stackGrad{l}.W = zeros(size(stack{l}.W));
+    stackGrad{l}.b = zeros(size(stack{l}.b));
+end
+if eI.shortCircuit
+    stackGrad{end}.W_ss = zeros(size(stack{end}.W_ss));
+end;
+%% check options
+if ~exist('fprop_only','var')
+    fprop_only = false;
+end;
+if ~exist('pred_out','var')
+    pred_out = false;
+end;
+
+% DROPOUT: vector of length of hidden layers with 0 or 1 
+% (to drop or keep activation unit) with prob=0.5
+hActToDrop = cell(numel(eI.layerSizes-1),1);
+for i=1:numel(eI.layerSizes)-1
+ if eI.dropout
+   hActToDrop{i} = round(rand(eI.layerSizes(i),1));
+ else
+   hActToDrop{i} = ones(eI.layerSizes(i),1);
+ end
+end
+
+%% loop over each distinct length
+for c = 1:numel(data_cell)
+    data = data_cell{c};
+    targets = {};
+    if ~isempty(targets_cell), targets = targets_cell{c}; end;
+    uttPred = [];
+    T =size(data,1) / eI.inputDim;
+    % store hidden unit activations at each time instant
+    hAct = cell(numel(eI.layerSizes-1), T);
+    for t = 1:T
+        %% forward prop all hidden layers
+        for l = 1:numel(eI.layerSizes)-1
+            if l == 1
+                hAct{1,t} = stack{1}.W * data((t-1)*eI.inputDim+1:t*eI.inputDim, :);
+            else
+                hAct{l,t} = stack{l}.W * hAct{l-1,t};
+            end;
+            hAct{l,t} = bsxfun(@plus, hAct{l,t}, stack{l}.b);
+            % temporal recurrence. limited to single layer for now
+            if l == eI.temporalLayer && t > 1
+                hAct{l,t} = hAct{l,t} + W_t * hAct{l,t-1};
+            end;
+            % nonlinearity
+            if strcmpi(eI.activationFn,'tanh')
+                hAct{l,t} = tanh(hAct{l,t});
+            elseif strcmpi(eI.activationFn,'logistic')
+                hAct{l,t} = 1./(1+exp(-hAct{l,t}));
+            else
+                error('unrecognized activation function: %s',eI.activationFn);
+            end;
+            %dropout (hActToDrop will be all ones if no dropout specified)
+            hAct{1,t} = bsxfun(@times, hAct{1,t}, hActToDrop{l});
+        end;
+        % forward prop top layer not done here to avoid caching it    
+    end;    
+    %% compute cost and backprop through time
+    if  eI.temporalLayer
+        delta_t = zeros(eI.layerSizes(eI.temporalLayer),size(data,2));
+    end;    
+    for t = T:-1:1
+        l = numel(eI.layerSizes);
+        %% forward prop output layer for this timestep
+        curPred = bsxfun(@plus, stack{l}.W * hAct{l-1,t}, stack{l}.b);
+        % add short circuit to regression prediction if model has it
+        if eI.shortCircuit
+            curPred = curPred + stack{end}.W_ss ...
+                * data((t-1)*eI.inputDim+1:t*eI.inputDim, :);
+        end;
+        if pred_out, uttPred = [curPred; uttPred]; end;
+        % skip loss computation if no targets given
+        if isempty(targets), continue; end;
+        curTargets = targets((t-1)*outputDim+1:t*outputDim, :);
+        %% compute cost. Squared L2 loss
+        delta = curPred - curTargets;
+        cost = cost + 0.5 * sum(delta(:).^2);
+        if fprop_only, continue; end;
+        %% regression layer gradient and delta        
+        stackGrad{l}.W = stackGrad{l}.W + delta * hAct{l-1,t}';
+        stackGrad{l}.b = stackGrad{l}.b + sum(delta,2);
+        % short circuit layer
+        if eI.shortCircuit
+            stackGrad{end}.W_ss = stackGrad{end}.W_ss + delta ...
+                * data((t-1)*eI.inputDim+1:t*eI.inputDim, :)';
+        end;
+        delta = stack{l}.W' * delta;
+        %% backprop through hidden layers
+        for l = numel(eI.layerSizes)-1:-1:1
+            % aggregate temporal delta term if this is the recurrent layer
+            if l == eI.temporalLayer
+                delta = delta + delta_t;
+            end;
+            % push delta through activation function for this layer
+            % tanh unit choice assumed
+            if strcmpi(eI.activationFn,'tanh')
+                delta = delta .* (1 - hAct{l,t}.^2);
+            elseif strcmpi(eI.activationFn,'logistic')
+                delta = delta .* hAct{l,t} .* (1 - hAct{l,t});
+            else
+                error('unrecognized activation function: %s',eI.activationFn);
+            end;            
+
+            % gradient of bottom-up connection for this layer            
+            if l > 1
+                stackGrad{l}.W = stackGrad{l}.W + delta * hAct{l-1,t}';
+            else
+                stackGrad{l}.W = stackGrad{l}.W + delta * data((t-1)*eI.inputDim+1:t*eI.inputDim, :)';
+            end;
+            % gradient for bias
+            stackGrad{l}.b = stackGrad{l}.b + sum(delta,2);
+
+            % compute derivative and delta for temporal connections
+            if l == eI.temporalLayer && t > 1
+                 W_t_grad = W_t_grad + delta * hAct{l,t-1}';
+                 % push delta through temporal weights
+                 delta_t = W_t' * delta;
+            end;
+            % push delta through bottom-up weights
+            if l > 1 
+                delta = stack{l}.W' * delta;
+            end;
+        end
+        % reduces avg memory usage but doesn't reduce peak
+        %hAct(:,t) = [];
+    end
+    pred_cell{c} = uttPred;
+    % Return the activations for this utterance. 
+    if return_activation,
+      act_cell{c} = cell2mat(hAct);
+    end
+    % keep track of how many examples seen in total
+    numTotal = numTotal + T * size(targets,2);
+end
+
+%% stack gradients into single vector and compute weight cost
+wCost = numTotal * eI.lambda * sum(theta.^2);
+grad = rnn_stack2params(stackGrad, eI, W_t_grad, true);
+grad = grad + 2 * numTotal * eI.lambda * theta;
+
+avCost = cost/numTotal;
+avWCost = wCost/numTotal;
+cost = cost + wCost;
+
+%% print output
+if ~isSlave && ~isempty(targets_cell)
+    fprintf('loss:  %f  wCost:  %f \t',avCost, avWCost);
+    fprintf('wNorm: %f  rNorm: %f  oNorm: %f\n',sum(stack{1}.W(:).^2),...
+        sum(W_t(:).^2), sum(stack{end}.W(:).^2));
+% plot(theta,'kx');drawnow;
+end;
diff --git a/getAllFiles.m b/getAllFiles.m
@@ -0,0 +1,19 @@
+function fileList = getAllFiles(dirName)
+% fileList = getAllFiles(dirName)
+% returns a list of all files contained in directory tree
+dirData = dir(dirName);      %# Get the data for the current directory
+dirIndex = [dirData.isdir];  %# Find the index for directories
+fileList = {dirData(~dirIndex).name}';  %'# Get a list of the files
+if ~isempty(fileList)
+    fileList = cellfun(@(x) fullfile(dirName,x),...  %# Prepend path to files
+        fileList,'UniformOutput',false);
+end
+subDirs = {dirData(dirIndex).name};  %# Get a list of the subdirectories
+validIndex = ~ismember(subDirs,{'.','..'});  %# Find index of subdirectories
+%#   that are not '.' or '..'
+for iDir = find(validIndex)                  %# Loop over valid subdirectories
+    nextDir = fullfile(dirName,subDirs{iDir});    %# Get the subdirectory path
+    fileList = [fileList; getAllFiles(nextDir)];  %# Recursively call getAllFiles
+end
+
+end
diff --git a/htkread.m b/htkread.m
@@ -0,0 +1,59 @@
+function [ DATA, HTKCode ] = htkread( Filename )
+% [ DATA, HTKCode ] = htkread( Filename )
+%
+% Read DATA from possibly compressed HTK format file.
+%
+% Filename (string) - Name of the file to read from
+% DATA (nSamp x NUMCOFS) - Output data array
+% HTKCode - HTKCode describing file contents
+%
+% Compression is handled using the algorithm in 5.10 of the HTKBook.
+% CRC is not implemented.
+%
+% Mark Hasegawa-Johnson
+% July 3, 2002
+% Based on function mfcc_read written by Alexis Bernard
+%
+
+fid=fopen(Filename,'r','b');
+if fid<0,
+    error(sprintf('Unable to read from file %s',Filename));
+end
+
+% Read number of frames
+nSamp = fread(fid,1,'int32');
+
+% Read sampPeriod
+sampPeriod = fread(fid,1,'int32');
+
+% Read sampSize
+sampSize = fread(fid,1,'int16');
+
+% Read HTK Code
+HTKCode = fread(fid,1,'int16');
+
+%%%%%%%%%%%%%%%%%
+% Read the data
+if bitget(HTKCode, 11),
+    DIM=sampSize/2;
+    nSamp = nSamp-4;
+%    disp(sprintf('htkread: Reading %d frames, dim %d, compressed, from %s',nSamp,DIM,Filename)); 
+
+    % Read the compression parameters
+    A = fread(fid,[1 DIM],'float');
+    B = fread(fid,[1 DIM],'float');
+
+    % Read and uncompress the data
+    DATA = fread(fid, [DIM nSamp], 'int16')';
+    DATA = (repmat(B, [nSamp 1]) + DATA) ./ repmat(A, [nSamp 1]);
+
+
+else
+    DIM=sampSize/4;
+%    disp(sprintf('htkread: Reading %d frames, dim %d, uncompressed, from %s',nSamp,DIM,Filename)); 
+
+    % If not compressed: Read floating point data
+    DATA = fread(fid, [DIM nSamp], 'float')';
+end
+
+fclose(fid);
diff --git a/htkwrite.m b/htkwrite.m
@@ -0,0 +1,79 @@
+function htkwrite( DATA, Filename, HTKCode, sampPeriod )
+% htkwrite( DATA, Filename, HTKCode, sampPeriod )
+%
+% Write DATA to HTK format file.
+%
+% Filename (string) - Name of the file to write to
+% DATA (NFRAMES x NUMCOFS) - data to write
+% HTKCode (scalar) - code describing write format
+%  Default value: 512+64+3 means LPCC_C_E
+% sampPeriod (scalar) - sample period, in 100ns units
+%  Default value: 100000 means 10ms
+%
+% DATA must be already formatted for HTK, i.e. coefficients first, then
+%  energy, then deltas if desired, then delta-energies if desired, etc.
+%
+% This function DOES NOT CHECK compatibility of HTKCode with DATA matrix.
+%
+% HTKCode is only checked to see whether or not we should perform compression.
+% Compression is performed using the algorithm in 5.10 of the HTKBook.
+% CRC is not supported by this function.
+%
+% Mark Hasegawa-Johnson
+% July 3, 2002
+% Based on function mfcc_write written by Alexis Bernard
+%
+
+% Find out whether or not compression is requested
+if nargin<3, 
+   HTKCode = 3; % Default code is LPCC 
+   HTKCode = bitset(HTKCode, 7); % Set the energy bit
+   HTKCode = bitset(HTKCode, 11); % Set the compression bit
+end
+
+% Open the file
+fid=fopen(Filename,'w','ieee-be');
+if fid<0,
+    error(sprintf('Unable to write to file %s',Filename));
+end
+
+% Find nSamp and NCOFS
+[ nSamp, NCOFS ] = size(DATA);
+
+% Write sampPeriod
+if nargin<4,
+    sampPeriod = 100000;
+end
+
+% If data are compressed, write compression parameters and compressed data
+if bitget(HTKCode, 11),
+    sampSize = 2*NCOFS;
+    %disp(sprintf('htkwrite: Writing %d frames, dim %d, compressed, to %s',nSamp,NCOFS,Filename));
+    fwrite(fid,nSamp+4,'int32');
+    fwrite(fid,sampPeriod,'int32');
+    fwrite(fid,sampSize,'int16');
+    fwrite(fid,HTKCode,'int16');
+
+    % Create and write the compression parameters
+    xmax = max(DATA);
+    xmin = min(DATA);
+    A = 2*32767./(xmax-xmin);
+    B = (xmax+xmin)*32767 ./ (xmax-xmin);
+    fwrite(fid, A, 'float');
+    fwrite(fid, B, 'float');
+    % Write compressed data
+    Xshort = round( repmat(A,[nSamp 1]) .* DATA - repmat(B,[nSamp 1]) );
+    fwrite(fid, Xshort', 'int16');   
+else
+    sampSize = 4*NCOFS;
+    %disp(sprintf('htkwrite: Writing %d frames, dim %d, uncompressed, to %s',nSamp,NCOFS,Filename));
+    fwrite(fid,nSamp,'int32');
+    fwrite(fid,sampPeriod,'int32');
+    fwrite(fid,sampSize,'int16');
+    fwrite(fid,HTKCode,'int16');
+
+    % Write uncompressed data
+    fwrite(fid, DATA', 'float');
+end
+
+fclose(fid);