forked from numenta/nupic-legacy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clamodel.py
1621 lines (1270 loc) · 57.9 KB
/
clamodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2013, Numenta, Inc. Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------
"""Encapsulation of CLAnetwork that implements the ModelBase."""
import copy
import math
import sys
import os
import json
import random
import datetime
import itertools
import numpy
import logging
import traceback
from collections import defaultdict, namedtuple, deque
from datetime import timedelta
from ordereddict import OrderedDict
from operator import itemgetter
from model import Model
from nupic.algorithms.anomaly import Anomaly as AnomalyImpl
from nupic.data import SENTINEL_VALUE_FOR_MISSING_DATA
from nupic.data.fieldmeta import FieldMetaSpecial, FieldMetaInfo
from nupic.data.filters import AutoResetFilter
from nupic.encoders import (MultiEncoder, DateEncoder, ScalarEncoder)
from nupic.engine import Network
from nupic.research import fdrutilities as fdrutils
from nupic.support import aggregationDivide
from nupic.support.fshelpers import makeDirectoryFromAbsolutePath
from opfutils import (InferenceType, InferenceElement, SensorInput,
PredictionElement, validateOpfJsonValue, initLogger)
from abc import ABCMeta, abstractmethod
DEFAULT_LIKELIHOOD_THRESHOLD = 0.0001
DEFAULT_MAX_PREDICTIONS_PER_STEP = 8
DEFAULT_ANOMALY_TRAINRECORDS = 4000
DEFAULT_ANOMALY_THRESHOLD = 1.1
DEFAULT_ANOMALY_CACHESIZE = 10000
def requireAnomalyModel(func):
"""
Decorator for functions that require anomaly models.
"""
def _decorator(self, *args, **kwargs):
if not (self.getInferenceType() == InferenceType.TemporalAnomaly):
raise RuntimeError("Method required a TemporalAnomaly model.")
if self._getAnomalyClassifier() is None:
raise RuntimeError("Model does not support this command. Model must"
"be an active anomalyDetector model.")
return func(self, *args, **kwargs)
return _decorator
###############################################################
class NetworkInfo(object):
""" Data type used as return value type by
CLAModel.__createCLANetwork()
"""
def __init__(self, net, statsCollectors):
"""
net: The CLA Network instance
statsCollectors:
Sequence of 0 or more CLAStatistic-based instances
"""
self.net = net
self.statsCollectors = statsCollectors
return
def __repr__(self):
return "NetworkInfo(net=%r, statsCollectors=%r)" % (
self.net, self.statsCollectors)
class CLAModel(Model):
__supportedInferenceKindSet = set((InferenceType.TemporalNextStep,
InferenceType.TemporalClassification,
InferenceType.NontemporalClassification,
InferenceType.NontemporalAnomaly,
InferenceType.TemporalAnomaly,
InferenceType.TemporalMultiStep,
InferenceType.NontemporalMultiStep))
__myClassName = "CLAModel"
#############################################################################
def __init__(self,
inferenceType=InferenceType.TemporalNextStep,
predictedField=None,
sensorParams={},
spEnable=True,
spParams={},
# TODO: We can't figure out what this is. Remove?
trainSPNetOnlyIfRequested=False,
tpEnable=True,
tpParams={},
clEnable=True,
clParams={},
anomalyParams={},
minLikelihoodThreshold=DEFAULT_LIKELIHOOD_THRESHOLD,
maxPredictionsPerStep=DEFAULT_MAX_PREDICTIONS_PER_STEP):
"""CLAModel constructor.
Args:
inferenceType: A value from the InferenceType enum class.
predictedField: The field to predict for multistep prediction.
sensorParams: A dictionary specifying the sensor parameters.
spEnable: Whether or not to use a spatial pooler.
spParams: A dictionary specifying the spatial pooler parameters. These
are passed to the spatial pooler.
trainSPNetOnlyIfRequested: If set, don't create an SP network unless the
user requests SP metrics.
tpEnable: Whether to use a temporal pooler.
tpParams: A dictionary specifying the temporal pooler parameters. These
are passed to the temporal pooler.
clEnable: Whether to use the classifier. If false, the classifier will
not be created and no predictions will be generated.
clParams: A dictionary specifying the classifier parameters. These are
are passed to the classifier.
anomalyParams: Anomaly detection parameters
minLikelihoodThreshold: The minimum likelihood value to include in
inferences. Currently only applies to multistep inferences.
maxPredictionsPerStep: Maximum number of predictions to include for
each step in inferences. The predictions with highest likelihood are
included.
"""
if not inferenceType in self.__supportedInferenceKindSet:
raise ValueError("{0} received incompatible inference type: {1}"\
.format(self.__class__, inferenceType))
# Call super class constructor
super(CLAModel, self).__init__(inferenceType)
# self.__restoringFromState is set to True by our __setstate__ method
# and back to False at completion of our _deSerializeExtraData() method.
self.__restoringFromState = False
self.__restoringFromV1 = False
# Intitialize logging
self.__logger = initLogger(self)
self.__logger.debug("Instantiating %s." % self.__myClassName)
# TODO: VERBOSITY should be deprecated since we now have logging with levels
self.__VERBOSITY = 0
self._minLikelihoodThreshold = minLikelihoodThreshold
self._maxPredictionsPerStep = maxPredictionsPerStep
# set up learning parameters (note: these may be replaced via
# enable/disable//SP/TP//Learning methods)
self.__spLearningEnabled = bool(spEnable)
self.__tpLearningEnabled = bool(tpEnable)
# Explicitly exclude the TP if this type of inference doesn't require it
if not InferenceType.isTemporal(self.getInferenceType()) \
or self.getInferenceType() == InferenceType.NontemporalMultiStep:
tpEnable = False
self._netInfo = None
self._hasSP = spEnable
self._hasTP = tpEnable
self._hasCL = clEnable
self._classifierInputEncoder = None
self._predictedFieldIdx = None
self._predictedFieldName = None
self._numFields = None
self._anomalyInst = None
# -----------------------------------------------------------------------
# Create the network
self._netInfo = self.__createCLANetwork(
sensorParams, spEnable, spParams, tpEnable, tpParams, clEnable,
clParams, anomalyParams)
# Initialize Spatial Anomaly detection parameters
if self.getInferenceType() == InferenceType.NontemporalAnomaly:
self._getSPRegion().setParameter('anomalyMode', True)
# Initialize Temporal Anomaly detection parameters
if self.getInferenceType() == InferenceType.TemporalAnomaly:
self._getTPRegion().setParameter('anomalyMode', True)
self._anomalyInst = AnomalyImpl(useTP=self._getTPRegion())
else:
self._anomalyInst = AnomalyImpl()
# -----------------------------------------------------------------------
# This flag, if present tells us not to train the SP network unless
# the user specifically asks for the SP inference metric
self.__trainSPNetOnlyIfRequested = trainSPNetOnlyIfRequested
self.__numRunCalls = 0
# Tracks whether finishedLearning() has been called
self.__finishedLearning = False
self.__logger.info("Instantiated %s" % self.__class__.__name__)
return
def getParameter(self, paramName):
if paramName == '__numRunCalls':
return self.__numRunCalls
else:
raise RuntimeError("'%s' parameter is not exposed by clamodel." % \
(paramName))
#############################################################################
def resetSequenceStates(self):
""" [virtual method override] Resets the model's sequence states. Normally
called to force the delineation of a sequence, such as between OPF tasks.
"""
if self._hasTP:
# Reset TP's sequence states
self._getTPRegion().executeCommand(['resetSequenceStates'])
self.__logger.debug("CLAModel.resetSequenceStates(): reset temporal "
"pooler's sequence states")
return
#############################################################################
def finishLearning(self):
""" [virtual method override] Places the model in a permanent "finished
learning" mode where it will not be able to learn from subsequent input
records.
NOTE: Upon completion of this command, learning may not be resumed on
the given instance of the model (e.g., the implementation may optimize
itself by pruning data structures that are necessary for learning)
"""
assert not self.__finishedLearning
if self._hasSP:
# Finish SP learning
self._getSPRegion().executeCommand(['finishLearning'])
self.__logger.debug(
"CLAModel.finishLearning(): finished SP learning")
if self._hasTP:
# Finish temporal network's TP learning
self._getTPRegion().executeCommand(['finishLearning'])
self.__logger.debug(
"CLAModel.finishLearning(): finished TP learning")
self.__spLearningEnabled = self.__tpLearningEnabled = False
self.__finishedLearning = True
return
def setFieldStatistics(self,fieldStats):
encoder = self._getEncoder()
# Set the stats for the encoders. The first argument to setFieldStats
# is the field name of the encoder. Since we are using a multiencoder
# we leave it blank, the multiencoder will propagate the field names to the
# underlying encoders
encoder.setFieldStats('',fieldStats)
def enableLearning(self):
"""[override] Turn Learning on for the current model """
super(CLAModel, self).enableLearning()
self.setEncoderLearning(True)
def disableLearning(self):
"""[override] Turn Learning off for the current model """
super(CLAModel, self).disableLearning()
self.setEncoderLearning(False)
def setEncoderLearning(self,learningEnabled):
Encoder = self._getEncoder()
Encoder.setLearning(learningEnabled)
return
# Anomaly Accessor Methods
@requireAnomalyModel
def setAnomalyParameter(self, param, value):
"""
Set a parameter of the anomaly classifier within this model.
"""
self._getAnomalyClassifier().setParameter(param, value)
@requireAnomalyModel
def getAnomalyParameter(self, param):
"""
Get a parameter of the anomaly classifier within this model.
"""
return self._getAnomalyClassifier().getParameter(param)
@requireAnomalyModel
def anomalyRemoveLabels(self, start, end, labelFilter):
"""
Remove labels from the anomaly classifier within this model.
"""
self._getAnomalyClassifier().getSelf().removeLabels(start, end, labelFilter)
@requireAnomalyModel
def anomalyAddLabel(self, start, end, labelName):
"""
Add labels from the anomaly classifier within this model.
"""
self._getAnomalyClassifier().getSelf().addLabel(start, end, labelName)
@requireAnomalyModel
def anomalyGetLabels(self, start, end):
"""
Get labels from the anomaly classifier within this model.
"""
return self._getAnomalyClassifier().getSelf().getLabels(start, end)
def run(self, inputRecord):
""" run one iteration of this model.
args:
inputRecord is a record object formatted according to
nupic.data.RecordStream.getNextRecordDict() result format.
return:
An ModelResult namedtuple (see opfutils.py) The contents of
ModelResult.inferences depends on the the specific inference type
of this model, which can be queried by getInferenceType()
"""
assert not self.__restoringFromState
assert inputRecord
results = super(CLAModel, self).run(inputRecord)
self.__numRunCalls += 1
self.__logger.debug("CLAModel.run() inputRecord=%s", (inputRecord))
results.inferences = {}
# -------------------------------------------------------------------------
# Turn learning on or off?
if '_learning' in inputRecord:
if inputRecord['_learning']:
self.enableLearning()
else:
self.disableLearning()
###########################################################################
# Predictions and Learning
###########################################################################
predictions = dict()
inputRecordSensorMappings = dict()
inferenceType = self.getInferenceType()
inferenceArgs = self.getInferenceArgs()
if inferenceArgs is None:
inferenceArgs = {}
self._sensorCompute(inputRecord)
self._spCompute()
self._tpCompute()
results.sensorInput = self._getSensorInputRecord(inputRecord)
tpTopDownComputed = False
inferences = {}
# TODO: Reconstruction and temporal classification not used. Remove
if self._isReconstructionModel():
inferences = self._reconstructionCompute()
tpTopDownComputed = True
elif self._isMultiStepModel():
inferences = self._multiStepCompute(rawInput=inputRecord)
# For temporal classification. Not used, and might not work anymore
elif self._isClassificationModel():
inferences = self._classifcationCompute()
results.inferences.update(inferences)
inferences = self._anomalyCompute(computeTPTopDown=(not tpTopDownComputed))
results.inferences.update(inferences)
# -----------------------------------------------------------------------
# Store the index and name of the predictedField
results.predictedFieldIdx = self._predictedFieldIdx
results.predictedFieldName = self._predictedFieldName
# =========================================================================
# output
assert (not self.isInferenceEnabled() or results.inferences is not None), \
"unexpected inferences: %r" % results.inferences
#self.__logger.setLevel(logging.DEBUG)
if self.__logger.isEnabledFor(logging.DEBUG):
self.__logger.debug("inputRecord: %r, results: %r" % (inputRecord,
results))
return results
def _getSensorInputRecord(self, inputRecord):
"""
inputRecord - dict containing the input to the sensor
Return a 'SensorInput' object, which represents the 'parsed'
representation of the input record
"""
sensor = self._getSensorRegion()
dataRow = copy.deepcopy(sensor.getSelf().getOutputValues('sourceOut'))
dataDict = copy.deepcopy(inputRecord)
inputRecordEncodings = sensor.getSelf().getOutputValues('sourceEncodings')
inputRecordCategory = int(sensor.getOutputData('categoryOut')[0])
resetOut = sensor.getOutputData('resetOut')[0]
return SensorInput(dataRow=dataRow,
dataDict=dataDict,
dataEncodings=inputRecordEncodings,
sequenceReset=resetOut,
category=inputRecordCategory)
def _sensorCompute(self, inputRecord):
sensor = self._getSensorRegion()
self._getDataSource().push(inputRecord)
sensor.setParameter('topDownMode', False)
sensor.prepareInputs()
try:
sensor.compute()
except StopIteration as e:
raise Exception("Unexpected StopIteration", e,
"ACTUAL TRACEBACK: %s" % traceback.format_exc())
def _spCompute(self):
sp = self._getSPRegion()
if sp is None:
return
sp.setParameter('topDownMode', False)
sp.setParameter('inferenceMode', self.isInferenceEnabled())
sp.setParameter('learningMode', self.isLearningEnabled())
sp.prepareInputs()
sp.compute()
def _tpCompute(self):
tp = self._getTPRegion()
if tp is None:
return
tp.setParameter('topDownMode', False)
tp.setParameter('inferenceMode', self.isInferenceEnabled())
tp.setParameter('learningMode', self.isLearningEnabled())
tp.prepareInputs()
tp.compute()
def _tpTopDownCompute(self):
tp = self._getTPRegion()
if tp is None:
return
tp.setParameter('topDownMode', True)
tp.prepareInputs()
tp.compute()
def _isReconstructionModel(self):
inferenceType = self.getInferenceType()
inferenceArgs = self.getInferenceArgs()
if inferenceType == InferenceType.TemporalNextStep:
return True
if inferenceArgs:
return inferenceArgs.get('useReconstruction', False)
return False
def _isMultiStepModel(self):
return self.getInferenceType() in (InferenceType.NontemporalMultiStep,
InferenceType.NontemporalClassification,
InferenceType.TemporalMultiStep,
InferenceType.TemporalAnomaly)
def _isClassificationModel(self):
return self.getInferenceType() in (InferenceType.TemporalClassification)
def _multiStepCompute(self, rawInput):
patternNZ = None
if self._getTPRegion() is not None:
tp = self._getTPRegion()
tpOutput = tp.getSelf()._tfdr.infActiveState['t']
patternNZ = tpOutput.reshape(-1).nonzero()[0]
elif self._getSPRegion() is not None:
sp = self._getSPRegion()
spOutput = sp.getOutputData('bottomUpOut')
patternNZ = spOutput.nonzero()[0]
elif self._getSensorRegion() is not None:
sensor = self._getSensorRegion()
sensorOutput = sensor.getOutputData('dataOut')
patternNZ = sensorOutput.nonzero()[0]
else:
raise RuntimeError("Attempted to make multistep prediction without"
"TP, SP, or Sensor regions")
inputTSRecordIdx = rawInput.get('_timestampRecordIdx')
return self._handleCLAClassifierMultiStep(
patternNZ=patternNZ,
inputTSRecordIdx=inputTSRecordIdx,
rawInput=rawInput)
def _classifcationCompute(self):
inference = {}
classifier = self._getClassifierRegion()
classifier.setParameter('inferenceMode', True)
classifier.setParameter('learningMode', self.isLearningEnabled())
classifier.prepareInputs()
classifier.compute()
# What we get out is the score for each category. The argmax is
# then the index of the winning category
classificationDist = classifier.getOutputData('categoriesOut')
classification = classificationDist.argmax()
probabilities = classifier.getOutputData('categoryProbabilitiesOut')
numCategories = classifier.getParameter('activeOutputCount')
classConfidences = dict(zip(xrange(numCategories), probabilities))
inference[InferenceElement.classification] = classification
inference[InferenceElement.classConfidences] = {0: classConfidences}
return inference
def _reconstructionCompute(self):
if not self.isInferenceEnabled():
return {}
tp = self._getTPRegion()
sp = self._getSPRegion()
sensor = self._getSensorRegion()
# TP Top-down flow
self._tpTopDownCompute()
#--------------------------------------------------
# SP Top-down flow
sp.setParameter('topDownMode', True)
sp.prepareInputs()
sp.compute()
#--------------------------------------------------
# Sensor Top-down flow
sensor.setParameter('topDownMode', True)
sensor.prepareInputs()
sensor.compute()
# Need to call getOutputValues() instead of going through getOutputData()
# because the return values may contain strings, which cannot be passed
# through the Region.cpp code.
# predictionRow is a list of values, one for each field. The value is
# in the same type as the original input to the encoder and may be a
# string for category fields for example.
predictionRow = copy.copy(sensor.getSelf().getOutputValues('temporalTopDownOut'))
predictionFieldEncodings = sensor.getSelf().getOutputValues('temporalTopDownEncodings')
inferences = {}
inferences[InferenceElement.prediction] = tuple(predictionRow)
inferences[InferenceElement.encodings] = tuple(predictionFieldEncodings)
return inferences
def _anomalyCompute(self, computeTPTopDown):
"""
Compute Anomaly score, if required
computeTPTopDown: If True, first perform a
"""
inferenceType = self.getInferenceType()
inferences = {}
if inferenceType == InferenceType.NontemporalAnomaly:
sp = self._getSPRegion()
score = sp.getOutputData("anomalyScore")[0] #TODO move from SP to Anomaly ?
inferences[InferenceElement.anomalyScore] = score
# -----------------------------------------------------------------------
# Temporal Anomaly Score
if inferenceType == InferenceType.TemporalAnomaly:
sp = self._getSPRegion()
tp = self._getTPRegion()
sensor = self._getSensorRegion()
if computeTPTopDown:
self._tpTopDownCompute()
if sp is not None:
activeColumns = sp.getOutputData("bottomUpOut").nonzero()[0]
else:
activeColumns = sensor.getOutputData('dataOut').nonzero()[0]
# Calculate the anomaly score using the active columns
# and previous predicted columns
inferences[InferenceElement.anomalyScore] = (self._anomalyInst.computeAnomalyScore(activeColumns,[]))
# Calculate the classifier's output and use the result as the anomaly
# label. Stores as string of results.
# TODO: make labels work with non-SP models
if sp is not None:
self._getAnomalyClassifier().setParameter(
"activeColumnCount", len(activeColumns))
self._getAnomalyClassifier().prepareInputs()
self._getAnomalyClassifier().compute()
labels = self._getAnomalyClassifier().getSelf().getLabelResults()
inferences[InferenceElement.anomalyLabel] = "%s" % labels
return inferences
def _handleCLAClassifierMultiStep(self, patternNZ,
inputTSRecordIdx,
rawInput):
""" Handle the CLA Classifier compute logic when implementing multi-step
prediction. This is where the patternNZ is associated with one of the
other fields from the dataset 0 to N steps in the future. This method is
used by each type of network (encoder only, SP only, SP +TP) to handle the
compute logic through the CLA Classifier. It fills in the inference dict with
the results of the compute.
Parameters:
-------------------------------------------------------------------
patternNZ: The input the CLA Classifier as a list of active input indices
inputTSRecordIdx: The index of the record as computed from the timestamp
and aggregation interval. This normally increments by 1
each time unless there are missing records. If there is no
aggregation interval or timestamp in the data, this will be
None.
rawInput: The raw input to the sensor, as a dict.
"""
classifier = self._getClassifierRegion()
if not self._hasCL or classifier is None:
# No classifier so return an empty dict for inferences.
return {}
sensor = self._getSensorRegion()
minLikelihoodThreshold = self._minLikelihoodThreshold
maxPredictionsPerStep = self._maxPredictionsPerStep
inferenceArgs = self.getInferenceArgs()
needLearning = self.isLearningEnabled()
inferences = {}
predictedFieldName = inferenceArgs.get('predictedField', None)
# Get the classifier input encoder, if we don't have it already
if self._classifierInputEncoder is None:
if predictedFieldName is None:
raise RuntimeError("This experiment description is missing "
"the 'predictedField' in its config, which is required "
"for multi-step prediction inference.")
# This is getting index of predicted field if being fed to CLA.
self._predictedFieldName = predictedFieldName
encoderList = sensor.getSelf().encoder.getEncoderList()
self._numFields = len(encoderList)
fieldNames = sensor.getSelf().encoder.getScalarNames()
if predictedFieldName in fieldNames:
self._predictedFieldIdx = fieldNames.index(predictedFieldName)
else:
# Predicted field was not fed into the network, only to the classifier
self._predictedFieldIdx = None
# In a multi-step model, the classifier input encoder is separate from
# the other encoders and always disabled from going into the bottom of
# the network.
if sensor.getSelf().disabledEncoder is not None:
encoderList = sensor.getSelf().disabledEncoder.getEncoderList()
else:
encoderList = []
if len(encoderList) >= 1:
fieldNames = sensor.getSelf().disabledEncoder.getScalarNames()
self._classifierInputEncoder = encoderList[fieldNames.index(
predictedFieldName)]
else:
# Legacy multi-step networks don't have a separate encoder for the
# classifier, so use the one that goes into the bottom of the network
encoderList = sensor.getSelf().encoder.getEncoderList()
self._classifierInputEncoder = encoderList[self._predictedFieldIdx]
# Get the actual value and the bucket index for this sample. The
# predicted field may not be enabled for input to the network, so we
# explicitly encode it outside of the sensor
# TODO: All this logic could be simpler if in the encoder itself
absoluteValue = rawInput[predictedFieldName]
bucketIdx = self._classifierInputEncoder.getBucketIndices(absoluteValue)[0]
# Convert the absolute values to deltas if necessary
# The bucket index should be handled correctly by the underlying delta encoder
if self._classifierInputEncoder.isDelta():
# Make the delta before any values have been seen 0 so that we do not mess up the
# range for the adaptive scalar encoder.
if not hasattr(self,"_ms_prevVal"):
self._ms_prevVal = absoluteValue
prevValue = self._ms_prevVal
self._ms_prevVal = absoluteValue
actualValue = absoluteValue - prevValue
else:
actualValue = absoluteValue
if isinstance(actualValue, float) and math.isnan(actualValue):
actualValue = SENTINEL_VALUE_FOR_MISSING_DATA
# Pass this information to the classifier's custom compute method
# so that it can assign the current classification to possibly
# multiple patterns from the past and current, and also provide
# the expected classification for some time step(s) in the future.
classifier.setParameter('inferenceMode', True)
classifier.setParameter('learningMode', needLearning)
classificationIn = {'bucketIdx': bucketIdx,
'actValue': actualValue}
# Handle missing records
if inputTSRecordIdx is not None:
recordNum = inputTSRecordIdx
else:
recordNum = self.__numRunCalls
clResults = classifier.getSelf().customCompute(recordNum=recordNum,
patternNZ=patternNZ,
classification=classificationIn)
# ---------------------------------------------------------------
# Get the prediction for every step ahead learned by the classifier
predictionSteps = classifier.getParameter('steps')
predictionSteps = [int(x) for x in predictionSteps.split(',')]
# We will return the results in this dict. The top level keys
# are the step number, the values are the relative likelihoods for
# each classification value in that time step, represented as
# another dict where the keys are the classification values and
# the values are the relative likelihoods.
inferences[InferenceElement.multiStepPredictions] = dict()
inferences[InferenceElement.multiStepBestPredictions] = dict()
# ======================================================================
# Plug in the predictions for each requested time step.
for steps in predictionSteps:
# From the clResults, compute the predicted actual value. The
# CLAClassifier classifies the bucket index and returns a list of
# relative likelihoods for each bucket. Let's find the max one
# and then look up the actual value from that bucket index
likelihoodsVec = clResults[steps]
bucketValues = clResults['actualValues']
# Create a dict of value:likelihood pairs. We can't simply use
# dict(zip(bucketValues, likelihoodsVec)) because there might be
# duplicate bucketValues (this happens early on in the model when
# it doesn't have actual values for each bucket so it returns
# multiple buckets with the same default actual value).
likelihoodsDict = dict()
bestActValue = None
bestProb = None
for (actValue, prob) in zip(bucketValues, likelihoodsVec):
if actValue in likelihoodsDict:
likelihoodsDict[actValue] += prob
else:
likelihoodsDict[actValue] = prob
# Keep track of best
if bestProb is None or likelihoodsDict[actValue] > bestProb:
bestProb = likelihoodsDict[actValue]
bestActValue = actValue
# Remove entries with 0 likelihood or likelihood less than
# minLikelihoodThreshold, but don't leave an empty dict.
likelihoodsDict = CLAModel._removeUnlikelyPredictions(
likelihoodsDict, minLikelihoodThreshold, maxPredictionsPerStep)
# ---------------------------------------------------------------------
# If we have a delta encoder, we have to shift our predicted output value
# by the sum of the deltas
if self._classifierInputEncoder.isDelta():
# Get the prediction history for this number of timesteps.
# The prediction history is a store of the previous best predicted values.
# This is used to get the final shift from the current absolute value.
if not hasattr(self, '_ms_predHistories'):
self._ms_predHistories = dict()
predHistories = self._ms_predHistories
if not steps in predHistories:
predHistories[steps] = deque()
predHistory = predHistories[steps]
# Find the sum of the deltas for the steps and use this to generate
# an offset from the current absolute value
sumDelta = sum(predHistory)
offsetDict = dict()
for (k, v) in likelihoodsDict.iteritems():
if k is not None:
# Reconstruct the absolute value based on the current actual value,
# the best predicted values from the previous iterations,
# and the current predicted delta
offsetDict[absoluteValue+float(k)+sumDelta] = v
# Push the current best delta to the history buffer for reconstructing the final delta
if bestActValue is not None:
predHistory.append(bestActValue)
# If we don't need any more values in the predictionHistory, pop off
# the earliest one.
if len(predHistory) >= steps:
predHistory.popleft()
# Provide the offsetDict as the return value
if len(offsetDict)>0:
inferences[InferenceElement.multiStepPredictions][steps] = \
offsetDict
else:
inferences[InferenceElement.multiStepPredictions][steps] = \
likelihoodsDict
if bestActValue is None:
inferences[InferenceElement.multiStepBestPredictions][steps] = \
None
else:
inferences[InferenceElement.multiStepBestPredictions][steps] = \
absoluteValue + sumDelta + bestActValue
# ---------------------------------------------------------------------
# Normal case, no delta encoder. Just plug in all our multi-step predictions
# with likelihoods as well as our best prediction
else:
# The multiStepPredictions element holds the probabilities for each
# bucket
inferences[InferenceElement.multiStepPredictions][steps] = \
likelihoodsDict
inferences[InferenceElement.multiStepBestPredictions][steps] = \
bestActValue
return inferences
#############################################################################
@classmethod
def _removeUnlikelyPredictions(cls, likelihoodsDict, minLikelihoodThreshold,
maxPredictionsPerStep):
"""Remove entries with 0 likelihood or likelihood less than
minLikelihoodThreshold, but don't leave an empty dict.
"""
maxVal = (None, None)
for (k, v) in likelihoodsDict.items():
if len(likelihoodsDict) <= 1:
break
if maxVal[0] is None or v >= maxVal[1]:
if maxVal[0] is not None and maxVal[1] < minLikelihoodThreshold:
del likelihoodsDict[maxVal[0]]
maxVal = (k, v)
elif v < minLikelihoodThreshold:
del likelihoodsDict[k]
# Limit the number of predictions to include.
likelihoodsDict = dict(sorted(likelihoodsDict.iteritems(),
key=itemgetter(1),
reverse=True)[:maxPredictionsPerStep])
return likelihoodsDict
def getRuntimeStats(self):
""" [virtual method override] get runtime statistics specific to this
model, i.e. activeCellOverlapAvg
return:
a dict where keys are statistic names and values are the stats
"""
ret = {"numRunCalls" : self.__numRunCalls}
#--------------------------------------------------
# Query temporal network stats
temporalStats = dict()
if self._hasTP:
for stat in self._netInfo.statsCollectors:
sdict = stat.getStats()
temporalStats.update(sdict)
ret[InferenceType.getLabel(InferenceType.TemporalNextStep)] = temporalStats
return ret
def getFieldInfo(self, includeClassifierOnlyField=False):
""" [virtual method override]
Returns the sequence of FieldMetaInfo objects specifying this
Model's output; note that this may be different than the list of
FieldMetaInfo objects supplied at initialization (e.g., due to the
transcoding of some input fields into meta-fields, such as datetime
-> dayOfWeek, timeOfDay, etc.)
Returns: List of FieldMetaInfo objects (see description above)
"""
encoder = self._getEncoder()
fieldNames = encoder.getScalarNames()
fieldTypes = encoder.getDecoderOutputFieldTypes()
assert len(fieldNames) == len(fieldTypes)
# Also include the classifierOnly field?
encoder = self._getClassifierOnlyEncoder()
if includeClassifierOnlyField and encoder is not None:
addFieldNames = encoder.getScalarNames()
addFieldTypes = encoder.getDecoderOutputFieldTypes()
assert len(addFieldNames) == len(addFieldTypes)
fieldNames = list(fieldNames) + addFieldNames
fieldTypes = list(fieldTypes) + addFieldTypes
fieldMetaList = map(FieldMetaInfo._make,
zip(fieldNames,
fieldTypes,
itertools.repeat(FieldMetaSpecial.none)))
return tuple(fieldMetaList)
def _getLogger(self):
""" Get the logger for this object. This is a protected method that is used
by the Model to access the logger created by the subclass
return:
A logging.Logger object. Should not be None
"""
return self.__logger
def _getSPRegion(self):
"""
Returns reference to the network's SP region
"""
return self._netInfo.net.regions.get('SP', None)
def _getTPRegion(self):
"""
Returns reference to the network's TP region
"""
return self._netInfo.net.regions.get('TP', None)
def _getSensorRegion(self):
"""
Returns reference to the network's Sensor region
"""
return self._netInfo.net.regions['sensor']
def _getClassifierRegion(self):
"""
Returns reference to the network's Classifier region
"""
if (self._netInfo.net is not None and
"Classifier" in self._netInfo.net.regions):
return self._netInfo.net.regions["Classifier"]
else: