-
Notifications
You must be signed in to change notification settings - Fork 11
/
learn_nse.m
221 lines (190 loc) · 7.4 KB
/
learn_nse.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
function [net,f_measure,g_mean,precision,recall,err] = learn_nse(net, data_train, labels_train, data_test, ...
labels_test, smote_params)
% [net,f_measure,g_mean,precision,recall,err] = learn_nse(net, ...
% data_train, labels_train, ...
% data_test, labels_test, ...
% smote_params)
%
% @net - initialized structure. you must initialize
% net.a - sigmoid slope (try 0.5)
% net.b - sigmoid cutoff (try 10)
% net.threshold - small error threshold (try 0.01)
% net.mclass - number of classes
% net.base_classifier - you should set this to be model.type
% which is submitted to CLASSIFIER_TRAIN.m
% @data_train - cell array of training data. each entry should
% have a n_oberservation by n_feature matrix
% @labels_train - cell array of class labels
% @data_test - cell array of training data. each entry should
% have a n_oberservation by n_feature matrix
% @labels_test - cell array of class labels
% @smote_params - optional structure for implementing learn++.cds
% smote_params.minority_class - minority class (scalar)
% smote_params.k - see SMOTE.m
% smote_params.N - see SMOTE.m
%
% Implementation of Learn++.NSE and Learn++.CDS. If @smote_params
% is specified then the implementation is Learn++.CDS
%
% Cite:
% 1) Elwell R. and Polikar R., "Incremental Learning of Concept Drift
% in Nonstationary Environments" IEEE Transactions on Neural Networks,
% vol. 22, no. 10, pp. 1517-1531
% 2) G. Ditzler and R. Polikar, "Incremental learning of concept drift
% from streaming imbalanced data," in IEEE Transactions on Knowledge
% & Data Engineering, 2012, accepted.
%
% @Author: Gregory Ditzler (gregory.ditzler@gmail.com)
%
% See also
% SMOTE.m CLASSIFIER_TRAIN.m CLASSIFIER_TEST.m
% learn_nse.m
% Copyright (C) 2013 Gregory Ditzler
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
if nargin < 5
error('LEARN_NSE :: You need to specify all the required inputs. ')
end
if nargin == 5
smote_params = {};
smote_on =false;
net.type = 'learn++.nse';
end
if nargin == 6
smote_on = true;
net.type = 'learn++.cds';
end
n_timestamps = length(data_train); % total number of time stamps
net.classifiers = {}; % classifiers
net.w = []; % weights
net.initialized = false;% set to false
net.t = 1; % track the time of learning
net.classifierweigths = {}; % array of classifier weights
f_measure = zeros(n_timestamps, net.mclass);
g_mean = zeros(n_timestamps, 1);
recall = zeros(n_timestamps, net.mclass);
precision = zeros(n_timestamps, net.mclass);
err = zeros(n_timestamps, 1);
for ell = 1:n_timestamps
% get the training data for the 't'th round
data_train_t = data_train{ell};
labels_train_t = labels_train{ell};
data_test_t = data_test{ell};
labels_test_t = labels_test{ell};
if smote_on == true
% add learn++.cds functionality here
syn_data = smote(...
data_train_t(labels_train_t == smote_params.minority_class,:), ...
smote_params.k, ...
smote_params.N);
data_train_t = [data_train_t; syn_data];
labels_train_t = [labels_train_t;...
ones(size(syn_data,1),1) * smote_params.minority_class];
i = randperm(numel(labels_train_t));
labels_train_t = labels_train_t(i);
data_train_t = data_train_t(i, :);
end
% has the
if net.initialized == false,
net.beta = [];
end
mt = size(data_train_t,1); % numnber of training examples
Dt = ones(mt,1)/mt; % initialize instance weight distribution
if net.initialized==1,
% STEP 1: Compute error of the existing ensemble on new data
predictions = classify_ensemble(net, data_train_t, labels_train_t);
Et = sum((predictions~=labels_train_t)/mt);
Bt = Et/(1-Et); % this is suggested in Metin's IEEE Paper
if Bt==0, Bt = 1/mt; end; % clip
% update and normalize the instance weights
Dt(predictions==labels_train_t) = Dt(predictions==labels_train_t) * Bt;
Dt = Dt/sum(Dt);
end
% STEP 3: New classifier
net.classifiers{end + 1} = classifier_train(...
net.base_classifier, ...
data_train_t, ...
labels_train_t);
% STEP 4: Evaluate all existing classifiers on new data
t = size(net.classifiers,2);
y = decision_ensemble(net, data_train_t, labels_train_t, t);
for k = 1:net.t
epsilon_tk = sum(Dt(y(:, k) ~= labels_train_t));
if (k<net.t)&&(epsilon_tk>0.5)
epsilon_tk = 0.5;
elseif (k==net.t)&&(epsilon_tk>0.5)
% try generate a new classifier
net.classifiers{k} = classifier_train(...
net.base_classifier, ...
data_train_t, ...
labels_train_t);
epsilon_tk = sum(Dt(y(:, k) ~= labels_train_t));
epsilon_tk(epsilon_tk > 0.5) = 0.5; % we tried; clip the loss
end
net.beta(net.t,k) = epsilon_tk / (1-epsilon_tk);
end
% compute the classifier weights
if net.t==1,
if net.beta(net.t,net.t)<net.threshold,
net.beta(net.t,net.t) = net.threshold;
end
net.w(net.t,net.t) = log(1/net.beta(net.t,net.t));
else
for k = 1:net.t,
b = t - k - net.b;
omega = 1:(net.t - k + 1);
omega = 1./(1+exp(-net.a*(omega-b)));
omega = (omega/sum(omega))';
beta_hat = sum(omega.*(net.beta(k:net.t,k)));
if beta_hat < net.threshold,
beta_hat = net.threshold;
end
net.w(net.t,k) = log(1/beta_hat);
end
end
% STEP 7: classifier voting weights
net.classifierweigths{end+1} = net.w(end,:);
[predictions,posterior] = classify_ensemble(net, data_test_t, labels_test_t);
%errs(ell) = sum(predictions ~= labels_test_t)/numel(labels_test_t);
[f_measure(ell,:),g_mean(ell),recall(ell,:),precision(ell,:),...
err(ell)] = stats(labels_test_t, predictions, net.mclass);
net.initialized = 1;
net.t = net.t + 1;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% AUXILARY FUNCTIONS
function y = decision_ensemble(net, data, labels, n_experts)
y = zeros(numel(labels), n_experts);
for k = 1:n_experts
y(:, k) = classifier_test(net.classifiers{k}, data);
end
function [predictions,posterior] = classify_ensemble(net, data, labels)
n_experts = length(net.classifiers);
weights = net.w(end,:);
if n_experts ~= length(weights)
error('What are there are different number of weights and experts!')
end
p = zeros(numel(labels), net.mclass);
for k = 1:n_experts
y = classifier_test(net.classifiers{k}, data);
% this is inefficient, but it does the job
for m = 1:numel(y)
p(m,y(m)) = p(m,y(m)) + weights(k);
end
end
[~,predictions] = max(p');
predictions = predictions';
posterior = p./repmat(sum(p,2),1,net.mclass);