Support for memmap as input type

Hi, I have a problem while trying to use Regressor. Im using X_train=numpy.loadtxt() methods for loading dense data from file. This file contains a dense vector by each row. There are 525 rows with 300 dimensions each. I configured a randomizedSearch called rs. 

```
X_train: (525, 300)
y_train: (525,)
X_test: (225, 300)
Traceback (most recent call last):
  File "nn.py", line 56, in <module>
    rs.fit(X_train, y_train)
  File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 996, in fit
    return self._fit(X, y, sampled_params)
  File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 553, in _fit
    for parameters in parameter_iterable
  File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 812, in __call__
    self.retrieve()
  File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 762, in retrieve
    raise exception
sklearn.externals.joblib.my_exceptions.JoblibAttributeError: JoblibAttributeError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/almac/ignacio/nlp-pipeline/nn.py in <module>()
     51     exit()
     52 
     53 crash = True
     54 while(crash):
     55     try:
---> 56         rs.fit(X_train, y_train)
     57         crash = False
     58     except RuntimeError:
     59         sys.stderr.write("----------- [Crashed by RunTimeERROR] --------------------- \n")
     60         crash = True

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
          scoring=None, verbose=0), X=array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]))
    991 
    992         """
    993         sampled_params = ParameterSampler(self.param_distributions,
    994                                           self.n_iter,
    995                                           random_state=self.random_state)
--> 996         return self._fit(X, y, sampled_params)
        self._fit = <bound method RandomizedSearchCV._fit of Randomi..., refit=True,
          scoring=None, verbose=0)>
        X = array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]])
        y = array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ])
        sampled_params = <sklearn.grid_search.ParameterSampler object>
    997 
    998 
    999 
   1000 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
          scoring=None, verbose=0), X=array([[ -7.19239362e+05,  -2.57225021e+05,  -5....758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]), parameter_iterable=<sklearn.grid_search.ParameterSampler object>)
    548         )(
    549             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    550                                     train, test, self.verbose, parameters,
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterSampler object>
    554                 for train, test in cv)
    555 
    556         # Out is a list of triplet: score, estimator, n_test_samples
    557         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=16), iterable=<generator object <genexpr>>)
    807             if pre_dispatch == "all" or n_jobs == 1:
    808                 # The iterable was consumed all at once by the above for loop.
    809                 # No need to wait for async callbacks to trigger to
    810                 # consumption.
    811                 self._iterating = False
--> 812             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=16)>
    813             # Make sure that we get a last message telling us we are done
    814             elapsed_time = time.time() - self._start_time
    815             self._print('Done %3i out of %3i | elapsed: %s finished',
    816                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
AttributeError                                     Sun Feb 21 19:22:11 2016
PID: 8977                                     Python 2.7.6: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.pyc in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
     67     def __init__(self, iterator_slice):
     68         self.items = list(iterator_slice)
     69         self._size = len(self.items)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):
     75         return self._size
     76 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -7.19239362e+05,  -2.57225021e+05,  -5...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 2.6 ,  2.6 ,  3.8 ,  4.2 ,  0.8 ,  1.4 ,... 4.2 ,  0.  ,  2.25,
        2.6 ,  2.  ,  3.  ]), scorer=<function _passthrough_scorer>, train=array([175, 176, 177, 178, 179, 180, 181, 182, 1...15, 516, 517, 518, 519, 520, 521, 522, 523, 524]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 167, 168,
       169, 170, 171, 172, 173, 174]), verbose=0, parameters={'hidden0__type': 'Rectifier', 'hidden0__units': 5, 'hidden1__type': 'Rectifier', 'hidden1__units': 37, 'learning_momentum': 1.2204617397379565, 'learning_rate': 0.09480671449399372, 'learning_rule': 'rmsprop', 'output__type': 'Linear', 'regularize': None}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1526 
   1527     try:
   1528         if y_train is None:
   1529             estimator.fit(X_train, **fit_params)
   1530         else:
-> 1531             estimator.fit(X_train, y_train, **fit_params)
   1532 
   1533     except Exception as e:
   1534         if error_score == 'raise':
   1535             raise

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([ 4.4 ,  2.2 ,  3.4 ,  4.  ,  2.6 ,  4.2 ,... 4.8 ,  4.2 ,  0.  ,  2.25,  2.6 ,  2.  ,  3.  ]), w=None)
    288         """
    289 
    290         if self.valid_set is not None:
    291             self.valid_set = self._reshape(*self.valid_set)
    292 
--> 293         return super(Regressor, self)._fit(X, y, w)
    294 
    295     def predict(self, X):
    296         """Calculate predictions for specified inputs.
    297 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    227         if self.verbose:
    228             log.debug("\nEpoch       Training Error       Validation Error       Time"
    229                       "\n------------------------------------------------------------")
    230 
    231         try:
--> 232             self._train(X, y, w)
    233         except RuntimeError as e:
    234             log.error("\n{}{}{}\n\n{}\n".format(
    235                 ansi.RED,
    236                 "A runtime exception was caught during training. This likely occurred due to\n"

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _train(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    146         for i in itertools.count(1):
    147             start_time = time.time()
    148             self._do_callback('on_epoch_start', locals())
    149 
    150             is_best_train = False
--> 151             avg_train_error = self._backend._train_impl(X, y, w)
    152             if avg_train_error is not None:
    153                 if math.isnan(avg_train_error):
    154                     raise RuntimeError("Training diverged and returned NaN.")
    155                 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _train_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None)
    300 
    301         self._print('\r')
    302         return loss / count
    303 
    304     def _train_impl(self, X, y, w=None):
--> 305         return self._batch_impl(X, y, w, self.trainer, mode='train', output='.', shuffle=True)
    306 
    307     def _valid_impl(self, X, y, w=None):
    308         return self._batch_impl(X, y, w, self.validator, mode='valid', output=' ', shuffle=False)
    309 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _batch_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None, processor=<theano.compile.function_module.Function object>, mode=u'train', output=u'.', shuffle=True)
    281             sys.stdout.flush()
    282 
    283     def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
    284         progress, batches = 0, X.shape[0] / self.batch_size
    285         loss, count = 0.0, 0
--> 286         for Xb, yb, wb, _ in self._iterate_data(self.batch_size, X, y, w, shuffle):
    287             self._do_callback('on_batch_start', locals())
    288 
    289             if mode == 'train':
    290                 loss += processor(Xb, yb, wb if wb is not None else 1.0)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _iterate_data(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, batch_size=1, X=memmap([[ -1.71586427e+07,   2.45456060e+06,   6...758269e+00,   6.37787139e-02,  -6.47838211e-02]]), y=array([[ 4.4 ],
       [ 2.2 ],
       [ 3.4 ],
...
       [ 2.6 ],
       [ 2.  ],
       [ 3.  ]]), w=None, shuffle=True)
    270         if shuffle:
    271             numpy.random.shuffle(indices)
    272 
    273         for index in range(0, total_size, batch_size):
    274             excerpt = indices[index:index + batch_size]
--> 275             Xb, yb, wb = cast(X, excerpt), cast(y, excerpt), cast(w, excerpt)
    276             yield Xb, yb, wb, excerpt
    277 
    278     def _print(self, text):
    279         if self.verbose:

...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in cast(array=memmap([[  6.39665702e+06,  -1.92380979e+05,   6...693613e+00,   5.43417984e+00,  -1.42057322e+00]]), indices=array([262]))
    258             if array is None:
    259                 return None
    260 
    261             array = array[indices]
    262             if type(array) != numpy.ndarray:
--> 263                 array = array.todense()
    264             if array.dtype != theano.config.floatX:
    265                 array = array.astype(theano.config.floatX)
    266             return array
    267 
AttributeError: 'memmap' object has no attribute 'todense'
```

Please help..


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Support for memmap as input type #181

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Support for memmap as input type #181

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions