This repository was archived by the owner on Jul 10, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 218
This repository was archived by the owner on Jul 10, 2021. It is now read-only.
Support for memmap as input type #181
Copy link
Copy link
Closed
Labels
Description
Hi, I have a problem while trying to use Regressor. Im using X_train=numpy.loadtxt() methods for loading dense data from file. This file contains a dense vector by each row. There are 525 rows with 300 dimensions each. I configured a randomizedSearch called rs.
X_train: (525, 300)
y_train: (525,)
X_test: (225, 300)
Traceback (most recent call last):
File "nn.py", line 56, in <module>
rs.fit(X_train, y_train)
File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 996, in fit
return self._fit(X, y, sampled_params)
File "/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py", line 553, in _fit
for parameters in parameter_iterable
File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 812, in __call__
self.retrieve()
File "/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py", line 762, in retrieve
raise exception
sklearn.externals.joblib.my_exceptions.JoblibAttributeError: JoblibAttributeError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/almac/ignacio/nlp-pipeline/nn.py in <module>()
51 exit()
52
53 crash = True
54 while(crash):
55 try:
---> 56 rs.fit(X_train, y_train)
57 crash = False
58 except RuntimeError:
59 sys.stderr.write("----------- [Crashed by RunTimeERROR] --------------------- \n")
60 crash = True
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
scoring=None, verbose=0), X=array([[ -7.19239362e+05, -2.57225021e+05, -5....758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([ 2.6 , 2.6 , 3.8 , 4.2 , 0.8 , 1.4 ,... 4.2 , 0. , 2.25,
2.6 , 2. , 3. ]))
991
992 """
993 sampled_params = ParameterSampler(self.param_distributions,
994 self.n_iter,
995 random_state=self.random_state)
--> 996 return self._fit(X, y, sampled_params)
self._fit = <bound method RandomizedSearchCV._fit of Randomi..., refit=True,
scoring=None, verbose=0)>
X = array([[ -7.19239362e+05, -2.57225021e+05, -5....758269e+00, 6.37787139e-02, -6.47838211e-02]])
y = array([ 2.6 , 2.6 , 3.8 , 4.2 , 0.8 , 1.4 ,... 4.2 , 0. , 2.25,
2.6 , 2. , 3. ])
sampled_params = <sklearn.grid_search.ParameterSampler object>
997
998
999
1000
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=RandomizedSearchCV(cv=None, error_score='raise',...e, refit=True,
scoring=None, verbose=0), X=array([[ -7.19239362e+05, -2.57225021e+05, -5....758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([ 2.6 , 2.6 , 3.8 , 4.2 , 0.8 , 1.4 ,... 4.2 , 0. , 2.25,
2.6 , 2. , 3. ]), parameter_iterable=<sklearn.grid_search.ParameterSampler object>)
548 )(
549 delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
550 train, test, self.verbose, parameters,
551 self.fit_params, return_parameters=True,
552 error_score=self.error_score)
--> 553 for parameters in parameter_iterable
parameters = undefined
parameter_iterable = <sklearn.grid_search.ParameterSampler object>
554 for train, test in cv)
555
556 # Out is a list of triplet: score, estimator, n_test_samples
557 n_fits = len(out)
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=16), iterable=<generator object <genexpr>>)
807 if pre_dispatch == "all" or n_jobs == 1:
808 # The iterable was consumed all at once by the above for loop.
809 # No need to wait for async callbacks to trigger to
810 # consumption.
811 self._iterating = False
--> 812 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=16)>
813 # Make sure that we get a last message telling us we are done
814 elapsed_time = time.time() - self._start_time
815 self._print('Done %3i out of %3i | elapsed: %s finished',
816 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
AttributeError Sun Feb 21 19:22:11 2016
PID: 8977 Python 2.7.6: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.pyc in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
67 def __init__(self, iterator_slice):
68 self.items = list(iterator_slice)
69 self._size = len(self.items)
70
71 def __call__(self):
---> 72 return [func(*args, **kwargs) for func, args, kwargs in self.items]
73
74 def __len__(self):
75 return self._size
76
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -7.19239362e+05, -2.57225021e+05, -5...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([ 2.6 , 2.6 , 3.8 , 4.2 , 0.8 , 1.4 ,... 4.2 , 0. , 2.25,
2.6 , 2. , 3. ]), scorer=<function _passthrough_scorer>, train=array([175, 176, 177, 178, 179, 180, 181, 182, 1...15, 516, 517, 518, 519, 520, 521, 522, 523, 524]), test=array([ 0, 1, 2, 3, 4, 5, 6, 7, ..., 167, 168,
169, 170, 171, 172, 173, 174]), verbose=0, parameters={'hidden0__type': 'Rectifier', 'hidden0__units': 5, 'hidden1__type': 'Rectifier', 'hidden1__units': 37, 'learning_momentum': 1.2204617397379565, 'learning_rate': 0.09480671449399372, 'learning_rule': 'rmsprop', 'output__type': 'Linear', 'regularize': None}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
1526
1527 try:
1528 if y_train is None:
1529 estimator.fit(X_train, **fit_params)
1530 else:
-> 1531 estimator.fit(X_train, y_train, **fit_params)
1532
1533 except Exception as e:
1534 if error_score == 'raise':
1535 raise
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([ 4.4 , 2.2 , 3.4 , 4. , 2.6 , 4.2 ,... 4.8 , 4.2 , 0. , 2.25, 2.6 , 2. , 3. ]), w=None)
288 """
289
290 if self.valid_set is not None:
291 self.valid_set = self._reshape(*self.valid_set)
292
--> 293 return super(Regressor, self)._fit(X, y, w)
294
295 def predict(self, X):
296 """Calculate predictions for specified inputs.
297
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _fit(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([[ 4.4 ],
[ 2.2 ],
[ 3.4 ],
...
[ 2.6 ],
[ 2. ],
[ 3. ]]), w=None)
227 if self.verbose:
228 log.debug("\nEpoch Training Error Validation Error Time"
229 "\n------------------------------------------------------------")
230
231 try:
--> 232 self._train(X, y, w)
233 except RuntimeError as e:
234 log.error("\n{}{}{}\n\n{}\n".format(
235 ansi.RED,
236 "A runtime exception was caught during training. This likely occurred due to\n"
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/mlp.pyc in _train(self=Regressor(batch_size=1, callback=None, debug=Fal...e, warning=None, weight_decay=None, weights=None), X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([[ 4.4 ],
[ 2.2 ],
[ 3.4 ],
...
[ 2.6 ],
[ 2. ],
[ 3. ]]), w=None)
146 for i in itertools.count(1):
147 start_time = time.time()
148 self._do_callback('on_epoch_start', locals())
149
150 is_best_train = False
--> 151 avg_train_error = self._backend._train_impl(X, y, w)
152 if avg_train_error is not None:
153 if math.isnan(avg_train_error):
154 raise RuntimeError("Training diverged and returned NaN.")
155
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _train_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([[ 4.4 ],
[ 2.2 ],
[ 3.4 ],
...
[ 2.6 ],
[ 2. ],
[ 3. ]]), w=None)
300
301 self._print('\r')
302 return loss / count
303
304 def _train_impl(self, X, y, w=None):
--> 305 return self._batch_impl(X, y, w, self.trainer, mode='train', output='.', shuffle=True)
306
307 def _valid_impl(self, X, y, w=None):
308 return self._batch_impl(X, y, w, self.validator, mode='valid', output=' ', shuffle=False)
309
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _batch_impl(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([[ 4.4 ],
[ 2.2 ],
[ 3.4 ],
...
[ 2.6 ],
[ 2. ],
[ 3. ]]), w=None, processor=<theano.compile.function_module.Function object>, mode=u'train', output=u'.', shuffle=True)
281 sys.stdout.flush()
282
283 def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
284 progress, batches = 0, X.shape[0] / self.batch_size
285 loss, count = 0.0, 0
--> 286 for Xb, yb, wb, _ in self._iterate_data(self.batch_size, X, y, w, shuffle):
287 self._do_callback('on_batch_start', locals())
288
289 if mode == 'train':
290 loss += processor(Xb, yb, wb if wb is not None else 1.0)
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in _iterate_data(self=<sknn.backend.lasagne.mlp.MultiLayerPerceptronBackend object>, batch_size=1, X=memmap([[ -1.71586427e+07, 2.45456060e+06, 6...758269e+00, 6.37787139e-02, -6.47838211e-02]]), y=array([[ 4.4 ],
[ 2.2 ],
[ 3.4 ],
...
[ 2.6 ],
[ 2. ],
[ 3. ]]), w=None, shuffle=True)
270 if shuffle:
271 numpy.random.shuffle(indices)
272
273 for index in range(0, total_size, batch_size):
274 excerpt = indices[index:index + batch_size]
--> 275 Xb, yb, wb = cast(X, excerpt), cast(y, excerpt), cast(w, excerpt)
276 yield Xb, yb, wb, excerpt
277
278 def _print(self, text):
279 if self.verbose:
...........................................................................
/usr/local/lib/python2.7/dist-packages/sknn/backend/lasagne/mlp.pyc in cast(array=memmap([[ 6.39665702e+06, -1.92380979e+05, 6...693613e+00, 5.43417984e+00, -1.42057322e+00]]), indices=array([262]))
258 if array is None:
259 return None
260
261 array = array[indices]
262 if type(array) != numpy.ndarray:
--> 263 array = array.todense()
264 if array.dtype != theano.config.floatX:
265 array = array.astype(theano.config.floatX)
266 return array
267
AttributeError: 'memmap' object has no attribute 'todense'
Please help..