-
Notifications
You must be signed in to change notification settings - Fork 0
/
testdigits.m
115 lines (88 loc) · 2.99 KB
/
testdigits.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Digit Classification (0,1,2,...,9) %%%
%%% project for Machine Learning, AUEB, 2014 %%%
%%% John Zobolas %%%
%%% TEST THE DATA!!! %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% We assume that the bernoullimix3(.m) has ran
% and has produced the necessary data
% -> m(N,K),p(K) for every k.
% we change the test data, making them either 0 or 1:
test = cell(1,10);
for digit=1:10
x = double(testDataList{digit});
x(x < 3) = 0;
x(x > 0) = 1;
test{digit} = x;
end
% find the a priori probabilities for every digit using the
% training set: (ep(digit)=N(digit)/N(all))
ep = zeros(1,10);
for digit=1:10
ep(digit) = size(trainDataList{digit},1);
end
ep = ep./sum(ep);
% dimension of data:
D = 784;
fileID = fopen('results.txt','w');
% for every K find a different average Error (for all the digits):
avgError = zeros(1,6);
KList = [1 2 4 8 16 32];
for numOfK = 1:6
totalTestData = 0;
totalCount = 0;
K = KList(numOfK);
fprintf(fileID,'\n%%%%%%%%%% RESULTS FOR K=%d %%%%%%%%%%\n',K);
for testdigit=1:10
% testdigit is an index
% x is the matrix with the test data
x = test{testdigit};
% how much test data for this digit
N = size(x,1);
% pith(N,10) is the matrix distribution
pith = zeros(N,10);
tic
for n=1:N
for c=1:10
sum1 = 0;
for k=1:K
product = 1;
for d=1:D
product = product * power(mcell{numOfK,c}(k,d),x(n,d)) * power(1-mcell{numOfK,c}(k,d),1-x(n,d));
end
sum1 = sum1 + product * pcell{numOfK,c}(k);
end
pith(n,c) = log(sum1) + log(ep(c));
end
end
time=toc;
fprintf(fileID,'\nTime: %f', time);
maxp = max(pith,[],2)'; % max of every row
pith = pith - maxp'*ones(1,10);
pith = exp(pith);
sumpith = sum(pith,2);
for n=1:N
pith(n,:) = pith(n,:)/sumpith(n);
end
% I want to check how many times in the test data the probability that a digit was indeed
% the one it should be, is the largest of all! E.g. for a test digit of '4' I expect the probability
% that it is indeed '4' to be larger, than for it to be '0,','1','2','3','5','6','7','8' or '9'.
% a column-vector with the indexes of the largest elements of every row of pith matrix:
[~,index] = max(pith,[],2);
count = 0;
for n=1:N
if (index(n) ~= testdigit)
count = count+1;
end
end
fprintf(fileID,'\nFor the digit=%d we found %d mistakes in %d test data. Error Percentage:%.3f\n',testdigit-1,count,N,(count/N)*100);
totalCount = totalCount + count;
totalTestData = totalTestData + N;
end
avgError(numOfK) = (totalCount/totalTestData)*100;
end
for numOfK=1:6
K = KList(numOfK);
fprintf(fileID,'For K=%d, the total error percentage was %.3f\n',K,avgError(numOfK));
end
fclose(fileID);