Skip to content
Browse files

RF: Kill Harvestable

Was never fully usable/transparent -- now dead
  • Loading branch information...
1 parent 8b10715 commit ed4e830987468c689b111439f7f104dff90ee94d @hanke committed Mar 29, 2012
Showing with 2 additions and 183 deletions.
  1. +0 −12 doc/source/devguide.rst
  2. +0 −143 mvpa2/base/state.py
  3. +2 −11 mvpa2/clfs/meta.py
  4. +0 −17 mvpa2/tests/test_clf.py
View
12 doc/source/devguide.rst
@@ -543,18 +543,6 @@ Things to implement for the next release (Release goals)
Numeric parameters can be part of .params Collection now, so they are
joined together.
-* Provide sufficient documentation about internal variable naming to make
- Harvester/Harvesting functionality usable. Currently the user is supposed
- to know, how a particular *local* variable is called to be able to harvest
- e.g. `feature_ids` of classifiers over cross-validation folds::
-
- class.HARVESTABLE={'blah' : ' some description'}
-
- Add information on HARVESTABLE and ConditionalAttribute
- Collectable -> Attribute
-
- base.attributes
-
* Restructure code base (incl. renaming and moving pieces)
Let's use the following list to come up with a nice structure for all
View
143 mvpa2/base/state.py
@@ -1025,146 +1025,3 @@ def __repr__(self, prefixes=None, fullname=False):
descr = property(lambda self: self.__descr,
doc="Description of the object if any")
-
-
-
-class Harvestable(ClassWithCollections):
- """Classes inherited from this class intend to collect attributes
- within internal processing.
-
- Subclassing Harvestable we gain ability to collect any internal
- data from the processing which is especially important if an
- object performs something in loop and discards some intermidiate
- possibly interesting results (like in case of
- CrossValidatedTransferError and ca of the trained classifier
- or TransferError).
-
- """
-
- harvested = ConditionalAttribute(enabled=False, doc=
- """Store specified attributes of classifiers at each split""")
-
- _KNOWN_COPY_METHODS = [ None, 'copy', 'deepcopy' ]
-
-
- def __init__(self, harvest_attribs=None, copy_attribs='copy', **kwargs):
- """Initialize state of harvestable
-
- Parameters
- harvest_attribs : list of (str or dict)
- What attributes of call to store and return within
- harvested conditional attribute. If an item is a dictionary,
- following keys are used ['name', 'copy'].
- copy_attribs : None or str, optional
- Default copying. If None -- no copying, 'copy'
- - shallow copying, 'deepcopy' -- deepcopying.
-
- """
- ClassWithCollections.__init__(self, **kwargs)
-
- self.__attribs = harvest_attribs
- self.__copy_attribs = copy_attribs
-
- self._set_attribs(harvest_attribs)
-
-
- def _set_attribs(self, attribs):
- """Set attributes to harvest
-
- Each attribute in self.__attribs must have following fields
- - name : functional (or arbitrary if 'obj' or 'attr' is set)
- description of the thing to harvest,
- e.g. 'transerror.clf.training_time'
- - obj : name of the object to harvest from (if empty,
- 'self' is assumed),
- e.g 'transerror'
- - attr : attribute of 'obj' to harvest,
- e.g. 'clf.training_time'
- - copy : None, 'copy' or 'deepcopy' - way to copy attribute
- """
- if attribs:
- # force the state
- self.ca.enable('harvested')
- self.__attribs = []
- for i, attrib in enumerate(attribs):
- if isinstance(attrib, dict):
- if not 'name' in attrib:
- raise ValueError, \
- "Harvestable: attribute must be a string or " + \
- "a dictionary with 'name'"
- else:
- attrib = {'name': attrib}
-
- # assign default method to copy
- if not 'copy' in attrib:
- attrib['copy'] = self.__copy_attribs
-
- # check copy method
- if not attrib['copy'] in self._KNOWN_COPY_METHODS:
- raise ValueError, "Unknown method %s. Known are %s" % \
- (attrib['copy'], self._KNOWN_COPY_METHODS)
-
- if not ('obj' in attrib or 'attr' in attrib):
- # Process the item to harvest
- # split into obj, attr. If obj is empty, then assume self
- split = attrib['name'].split('.', 1)
- if len(split)==1:
- obj, attr = split[0], None
- else:
- obj, attr = split
- attrib.update({'obj':obj, 'attr':attr})
-
- if attrib['obj'] == '':
- attrib['obj'] = 'self'
-
- # TODO: may be enabling of the ca??
-
- self.__attribs.append(attrib) # place value back
- else:
- # just to make sure it is not None or 0
- self.__attribs = []
-
-
- def _harvest(self, vars):
- """The harvesting function: must obtain dictionary of variables
- from the caller.
-
- Parameters
- ----------
- vars : dict
- Dictionary of available data. Most often locals() could be
- passed as `vars`. Mention that desired to be harvested
- private attributes better be bound locally to some variable
-
- Returns
- -------
- nothing
- """
-
- if not self.ca.is_enabled('harvested') or len(self.__attribs)==0:
- return
-
- if not self.ca.is_set('harvested'):
- self.ca.harvested = dict([(a['name'], [])
- for a in self.__attribs])
-
- for attrib in self.__attribs:
- attrv = vars[attrib['obj']]
-
- # access particular attribute if needed
- if not attrib['attr'] is None:
- attrv = eval('attrv.%s' % attrib['attr'])
-
- # copy the value if needed
- attrv = {'copy':copy.copy,
- 'deepcopy':copy.deepcopy,
- None:lambda x:x}[attrib['copy']](attrv)
-
- self.ca.harvested[attrib['name']].append(attrv)
-
-
- harvest_attribs = property(fget=lambda self:self.__attribs,
- fset=_set_attribs)
-
-
-
View
13 mvpa2/clfs/meta.py
@@ -32,8 +32,7 @@
from mvpa2.datasets.miscfx import get_samples_by_attr
from mvpa2.misc.attrmap import AttributeMap
from mvpa2.base.dochelpers import _str
-from mvpa2.base.state import ConditionalAttribute, ClassWithCollections, \
- Harvestable
+from mvpa2.base.state import ConditionalAttribute, ClassWithCollections
from mvpa2.clfs.base import Classifier
from mvpa2.clfs.distance import cartesian_distance
@@ -53,14 +52,13 @@
from mvpa2.base import debug
-class BoostedClassifier(Classifier, Harvestable):
+class BoostedClassifier(Classifier):
"""Classifier containing the farm of other classifiers.
Should rarely be used directly. Use one of its childs instead
"""
# should not be needed if we have prediction_estimates upstairs
- # raw_predictions should be handled as Harvestable???
raw_predictions = ConditionalAttribute(enabled=False,
doc="Predictions obtained from each classifier")
@@ -69,7 +67,6 @@ class BoostedClassifier(Classifier, Harvestable):
def __init__(self, clfs=None, propagate_ca=True,
- harvest_attribs=None, copy_attribs='copy',
**kwargs):
"""Initialize the instance.
@@ -90,7 +87,6 @@ def __init__(self, clfs=None, propagate_ca=True,
clfs = []
Classifier.__init__(self, **kwargs)
- Harvestable.__init__(self, harvest_attribs, copy_attribs)
self.__clfs = None
"""Pylint friendly definition of __clfs"""
@@ -124,9 +120,6 @@ def _posttrain(self, dataset):
Harvest over the trained classifiers if it was asked to so
"""
Classifier._posttrain(self, dataset)
- if self.ca.is_enabled('harvested'):
- for clf in self.__clfs:
- self._harvest(locals())
if self.params.retrainable:
self.__changedData_isset = False
@@ -1138,8 +1131,6 @@ class SplitClassifier(CombinedClassifier):
all: map sets of labels into 2 categories...
"""
- # TODO: unify with CrossValidatedTransferError which now uses
- # harvest_attribs to expose gathered attributes
stats = ConditionalAttribute(enabled=False,
doc="Resultant confusion whenever classifier trained " +
"on 1 part and tested on 2nd part of each split")
View
17 mvpa2/tests/test_clf.py
@@ -438,23 +438,6 @@ def test_split_classifier_extended(self, clf_):
# msg="Should classify correctly")
-
- def test_harvesting(self):
- """Basic testing of harvesting based on SplitClassifier
- """
- ds = self.data_bin_1
- clf = SplitClassifier(clf=SameSignClassifier(),
- enable_ca=['stats', 'training_stats'],
- harvest_attribs=['clf.ca.training_time'],
- descr="DESCR")
- clf.train(ds) # train the beast
- # Number of harvested items should be equal to number of chunks
- self.assertEqual(
- len(clf.ca.harvested['clf.ca.training_time']), len(ds.UC))
- # if we can blame multiple inheritance and ClassWithCollections.__init__
- self.assertEqual(clf.descr, "DESCR")
-
-
def test_mapped_classifier(self):
samples = np.array([ [ 0, 0, -1], [ 1, 0, 1],
[-1, -1, 1], [-1, 0, 1],

0 comments on commit ed4e830

Please sign in to comment.
Something went wrong with that request. Please try again.