Skip to content
This repository was archived by the owner on Jul 10, 2021. It is now read-only.
This repository was archived by the owner on Jul 10, 2021. It is now read-only.

Support for memmap as input type #181

@iarroyof

Description

@iarroyof

Hi, I have a problem while trying to use Regressor. Im using X_train=numpy.loadtxt() methods for loading dense data from file. This file contains a dense vector by each row. There are 525 rows with 300 dimensions each. I configured a randomizedSearch called rs.

X_train: (525, 300)
y_train: (525,)
X_test: (225, 300)
Traceback (most recent call last):
  File "nn.py", line 56, in <module>
    rs.fit(X_train, y_train)
  File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 996, in fit
    return self._fit(X, y, sampled_params)
  File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 553, in _fit
    for parameters in parameter_iterable
  File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 812, in __call__
    self.retrieve()
  File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 762, in retrieve
    raise exception
sklearn.externals.joblib.my_exceptions.JoblibAttributeError: JoblibAttributeError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/almac/ignacio/nlp-pipeline/nn.py in <module>()
     51     exit()
     52 
     53 crash = True
     54 while(crash):
     55     try:
---> 56         rs.fit(X_train, y_train)
     57         crash = False
     58     except RuntimeError:
     59         sys.stderr.write("----------- [Crashed by RunTimeERROR] --------------------- \n")
     60         crash = True

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
          scoring=None, verbose=0), X=array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]))
    991 
    992         """
    993         sampled_params = ParameterSampler(self.param_distributions,
    994                                           self.n_iter,
    995                                           random_state=self.random_state)
--> 996         return self._fit(X, y, sampled_params)
        self._fit = <bound method RandomizedSearchCV._fit of Randomi..., refit=True,
          scoring=None, verbose=0)>
        X = array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]])
        y = array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ])
        sampled_params = <sklearn.grid_search.ParameterSampler object>
    997 
    998 
    999 
   1000 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
          scoring=None, verbose=0), X=array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]), parameter_iterable=<sklearn.grid_search.ParameterSampler object>)
    548         )(
    549             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    550                                     train, test, self.verbose, parameters,
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterSampler object>
    554                 for train, test in cv)
    555 
    556         # Out is a list of triplet: score, estimator, n_test_samples
    557         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=16), iterable=<generator object <genexpr>>)
    807             if pre_dispatch == "all" or n_jobs == 1:
    808                 # The iterable was consumed all at once by the above for loop.
    809                 # No need to wait for async callbacks to trigger to
    810                 # consumption.
    811                 self._iterating = False
--> 812             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=16)>
    813             # Make sure that we get a last message telling us we are done
    814             elapsed_time = time.time() - self._start_time
    815             self._print('Done %3i out of %3i | elapsed: %s finished',
    816                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
AttributeError                                     Sun Feb 21 19:22:11 2016
PID: 8977                                     Python 2.7.6: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.pyc in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
     67     def __init__(self, iterator_slice):
     68         self.items = list(iterator_slice)
     69         self._size = len(self.items)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):
     75         return self._size
     76 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -7.19239362e+05,  -2.57225021e+05,  -5...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]), scorer=<function _passthrough_scorer>, train=array([175, 176, 177, 178, 179, 180, 181, 182, 1...15, 516, 517, 518, 519, 520, 521, 522, 523, 524]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 167, 168,
       169, 170, 171, 172, 173, 174]), verbose=0, parameters={'hidden0__type': 'Rectifier', 'hidden0__units': 5, 'hidden1__type': 'Rectifier', 'hidden1__units': 37, 'learning_momentum': 1.2204617397379565, 'learning_rate': 0.09480671449399372, 'learning_rule': 'rmsprop', 'output__type': 'Linear', 'regularize': None}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1526 
   1527     try:
   1528         if y_train is None:
   1529             estimator.fit(X_train, **fit_params)
   1530         else:
-> 1531             estimator.fit(X_train, y_train, **fit_params)
   1532 
   1533     except Exception as e:
   1534         if error_score == 'raise':
   1535             raise

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 4.4 ,  2.2 ,  3.4 ,  4.  ,  2.6 ,  4.2 ,... 4.8 ,  4.2 ,  0.  ,  2.25,  2.6 ,  2.  ,  3.  ]), w=None)
    288         """
    289 
    290         if self.valid_set is not None:
    291             self.valid_set = self._reshape(*self.valid_set)
    292 
--> 293         return super(Regressor, self)._fit(X, y, w)
    294 
    295     def predict(self, X):
    296         """Calculate predictions for specified inputs.
    297 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    227         if self.verbose:
    228             log.debug("\nEpoch       Training Error       Validation Error       Time"
    229                       "\n------------------------------------------------------------")
    230 
    231         try:
--> 232             self._train(X, y, w)
    233         except RuntimeError as e:
    234             log.error("\n{}{}{}\n\n{}\n".format(
    235                 ansi.RED,
    236                 "A runtime exception was caught during training. This likely occurred due to\n"

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _train(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    146         for i in itertools.count(1):
    147             start_time = time.time()
    148             self._do_callback('on_epoch_start', locals())
    149 
    150             is_best_train = False
--> 151             avg_train_error = self._backend._train_impl(X, y, w)
    152             if avg_train_error is not None:
    153                 if math.isnan(avg_train_error):
    154                     raise RuntimeError("Training diverged and returned NaN.")
    155                 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _train_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    300 
    301         self._print('\r')
    302         return loss / count
    303 
    304     def _train_impl(self, X, y, w=None):
--> 305         return self._batch_impl(X, y, w, self.trainer, mode='train', output='.', shuffle=True)
    306 
    307     def _valid_impl(self, X, y, w=None):
    308         return self._batch_impl(X, y, w, self.validator, mode='valid', output=' ', shuffle=False)
    309 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _batch_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None, processor=<theano.compile.function_module.Function object>, mode=u'train', output=u'.', shuffle=True)
    281             sys.stdout.flush()
    282 
    283     def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
    284         progress, batches = 0, X.shape[0] / self.batch_size
    285         loss, count = 0.0, 0
--> 286         for Xb, yb, wb, _ in self._iterate_data(self.batch_size, X, y, w, shuffle):
    287             self._do_callback('on_batch_start', locals())
    288 
    289             if mode == 'train':
    290                 loss += processor(Xb, yb, wb if wb is not None else 1.0)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _iterate_data(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, batch_size=1, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None, shuffle=True)
    270         if shuffle:
    271             numpy.random.shuffle(indices)
    272 
    273         for index in range(0, total_size, batch_size):
    274             excerpt = indices[index:index + batch_size]
--> 275             Xb, yb, wb = cast(X, excerpt), cast(y, excerpt), cast(w, excerpt)
    276             yield Xb, yb, wb, excerpt
    277 
    278     def _print(self, text):
    279         if self.verbose:

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in cast(array=memmap([[  6.39665702e+06,  -1.92380979e+05,   6...693613e+00,   5.43417984e+00,  -1.42057322e+00]]), indices=array([262]))
    258             if array is None:
    259                 return None
    260 
    261             array = array[indices]
    262             if type(array) != numpy.ndarray:
--> 263                 array = array.todense()
    264             if array.dtype != theano.config.floatX:
    265                 array = array.astype(theano.config.floatX)
    266             return array
    267 
AttributeError: 'memmap' object has no attribute 'todense'

Please help..

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions