## Gaussian Distribution and Bayes-rule based plug-in classifier

## Load data
- MFCCfeatures.mat contains MFCC features from 2671 soundclips of pronounciations of 8 different vowels.

In [1]:
load('MFCCfeatures.mat','VowelClass','VowelFeatures')




## Split the data into a training and test sets randomly (randperm) in proportions 90% and 10%.

In [2]:
p = randperm(size(VowelClass,1));

trn_size = round(0.9*size(VowelFeatures,1));
test_size = size(VowelFeatures,1) - trn_size;

trn_data = VowelFeatures(p(1:trn_size),:);
trn_class = VowelClass(p(1:trn_size)); 
trn_items = unique(trn_class);

test_data = VowelFeatures(p(trn_size+1:end),:);
test_class = VowelClass(p(trn_size+1:end)); 
test_items = unique(test_class);




## Bayes-rule classifer with an assumption that each attribute is distributed according to Gaussian/Normal distribution.

In [3]:
D = size(VowelFeatures,2);  % number of dimension
G = size(trn_items,1);      % number of classes

priorhat = zeros(G,1);      % estimate of prior probability of each class
muhat = zeros(G,D);         % estimate of mean vector for each class
% sigmahat = zeros(D,D,G);  % estimate of covariance matrix for each class
SIGhat = zeros(D,D,G);      % estimate of covariance matrix for each class
% Make the covariance matrix more diagonal 
% for i=1:G
%     SIGhat(:,:,i) = eye(D);
% end

% Training Phase
for it=1:G
    tmp_data = trn_data(trn_class==trn_items(it),:);    % training data per each class
    priorhat(it) = size(tmp_data,1)/size(trn_data,1);
    muhat(it,:) = mean(tmp_data,1);
%     sigmahat(:,:,it) = cov(tmp_data);
    for n=1:size(tmp_data,1)
        SIGhat(:,:,it) =  SIGhat(:,:,it) + (tmp_data(n,:)-muhat(it,:))'*(tmp_data(n,:)-muhat(it,:));
    end
    SIGhat(:,:,it) = SIGhat(:,:,it)./size(tmp_data,1);
end




In [4]:
% Here, I just test the training data
L = [];
for i = 1:G
    likelihood = mvnpdf(trn_data,muhat(i,:),SIGhat(:,:,i));
    L = [L likelihood*priorhat(i)];
end

[~,decisions] = max(L,[],2);
trn_class_predicted = trn_items(decisions);

% computation of error rate (Training data)
error_rate_trn = sum(trn_class~=trn_class_predicted)/size(trn_class,1)


%test_class_predicted = char(size(test_class,1));


error_rate_trn =

   4.1597e-04




In [6]:
L = [];

% Test Phase
for i = 1:G
    likelihood = mvnpdf(test_data,muhat(i,:),SIGhat(:,:,i));
    L = [L likelihood*priorhat(i)];
end

[~,decisions] = max(L,[],2);
test_class_predicted = test_items(decisions);

% computation of error rate (Test data)
error_rate_test = sum(test_class~=test_class_predicted)/size(test_class,1)



error_rate_test =

     0


