Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

raise "None of [{key}] are in the [{axis_name}]" when running scglue.models.fit_SCGLUE #32

Closed
LMH0066 opened this issue Jun 19, 2022 · 5 comments
Labels
bug Something isn't working

Comments

@LMH0066
Copy link

LMH0066 commented Jun 19, 2022

Hi,

I followed the steps in the documentation to process my data, but an error occurred while running scglue.models.fit_SCGLUE. I'm not sure where the index numbers came from, or how to fix the mistake. Thanks.
image

@LMH0066
Copy link
Author

LMH0066 commented Jun 19, 2022

Using breakpoints, I found the problem that went wrong: If the OBSM data type used is dataframe, scglue does not recognize and efficiently fetch the data. I'm not sure if all OBSM data types default to numpy, but OBSM data types of dataframe are error-free in anndata.
image

@Jeff1995
Copy link
Collaborator

Thanks for the report and explanation! Could you please post a full traceback so I can locate the problem in our code more easily?

@Jeff1995 Jeff1995 added the question Further information is requested label Jun 20, 2022
@LMH0066
Copy link
Author

LMH0066 commented Jun 20, 2022

Thanks for the report and explanation! Could you please post a full traceback so I can locate the problem in our code more easily?

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/init.py:91, in fit_SCGLUE(adatas, graph, model, init_kws, compile_kws, fit_kws, balance_kws)
89 pretrain = model(adatas, sorted(graph.nodes), **pretrain_init_kws)
90 pretrain.compile(**compile_kws)
---> 91 pretrain.fit(adatas, graph, **pretrain_fit_kws)
92 if "directory" in pretrain_fit_kws:
93 pretrain.save(os.path.join(pretrain_fit_kws["directory"], "pretrain.dill"))

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/scglue.py:1491, in SCGLUEModel.fit(self, adatas, graph, edge_weight, edge_sign, neg_samples, val_split, data_batch_size, graph_batch_size, align_burnin, safe_burnin, max_epochs, patience, reduce_lr_patience, wait_n_lrs, directory)
1488 if self.trainer.freeze_u:
1489 self.logger.info("Cell embeddings are frozen")
-> 1491 super().fit(
1492 data, graph, val_split=val_split,
1493 data_batch_size=data_batch_size, graph_batch_size=graph_batch_size,
1494 align_burnin=align_burnin, safe_burnin=safe_burnin,
1495 max_epochs=max_epochs, patience=patience,
1496 reduce_lr_patience=reduce_lr_patience, wait_n_lrs=wait_n_lrs,
1497 random_seed=self.random_seed,
1498 directory=directory
1499 )

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/base.py:334, in Model.fit(self, *args, **kwargs)
319 def fit(self, *args, **kwargs) -> None:
320 r"""
321 Alias of .trainer.fit.
322
(...)
332 Subclasses may override arguments for API definition.
333 """
--> 334 self.trainer.fit(*args, **kwargs)

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/glue.py:608, in GLUETrainer.fit(self, data, graph, val_split, data_batch_size, graph_batch_size, align_burnin, safe_burnin, max_epochs, patience, reduce_lr_patience, wait_n_lrs, random_seed, directory, plugins)
606 plugins = default_plugins + (plugins or [])
607 try:
--> 608 super().fit(
609 train_loader, val_loader=val_loader,
610 max_epochs=max_epochs, random_seed=random_seed,
611 directory=directory, plugins=plugins
612 )
613 finally:
614 data.clean()

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/base.py:200, in Trainer.fit(self, train_loader, val_loader, max_epochs, random_seed, directory, plugins)
198 # Start engines
199 torch.manual_seed(random_seed)
--> 200 train_engine.run(train_loader, max_epochs=max_epochs)
202 torch.cuda.empty_cache()

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:704, in Engine.run(self, data, max_epochs, epoch_length, seed)
701 raise ValueError("epoch_length should be provided if data is None")
703 self.state.dataloader = data
--> 704 return self._internal_run()

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:783, in Engine._internal_run(self)
781 self._dataloader_iter = None
782 self.logger.error(f"Engine run is terminating due to exception: {e}")
--> 783 self._handle_exception(e)
785 self._dataloader_iter = None
786 return self.state

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:464, in Engine._handle_exception(self, e)
462 def _handle_exception(self, e: BaseException) -> None:
463 if Events.EXCEPTION_RAISED in self._event_handlers:
--> 464 self._fire_event(Events.EXCEPTION_RAISED, e)
465 else:
466 raise e

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:421, in Engine._fire_event(self, event_name, *event_args, **event_kwargs)
419 kwargs.update(event_kwargs)
420 first, others = ((args[0],), args[1:]) if (args and args[0] == self) else ((), args)
--> 421 func(*first, *(event_args + others), **kwargs)

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/base.py:163, in Trainer.fit.._handle_exception(engine, e)
161 engine.terminate()
162 else:
--> 163 raise e

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:753, in Engine._internal_run(self)
750 if self._dataloader_iter is None:
751 self._setup_engine()
--> 753 time_taken = self._run_once_on_dataset()
754 # time is available for handlers but must be update after fire
755 self.state.times[Events.EPOCH_COMPLETED.name] = time_taken

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:854, in Engine._run_once_on_dataset(self)
852 except Exception as e:
853 self.logger.error(f"Current run is terminating due to exception: {e}")
--> 854 self._handle_exception(e)
856 return time.time() - start_time

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:464, in Engine._handle_exception(self, e)
462 def _handle_exception(self, e: BaseException) -> None:
463 if Events.EXCEPTION_RAISED in self._event_handlers:
--> 464 self._fire_event(Events.EXCEPTION_RAISED, e)
465 else:
466 raise e

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:421, in Engine._fire_event(self, event_name, *event_args, **event_kwargs)
419 kwargs.update(event_kwargs)
420 first, others = ((args[0],), args[1:]) if (args and args[0] == self) else ((), args)
--> 421 func(*first, *(event_args + others), **kwargs)

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/base.py:163, in Trainer.fit.._handle_exception(engine, e)
161 engine.terminate()
162 else:
--> 163 raise e

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/ignite/engine/engine.py:807, in Engine._run_once_on_dataset(self)
805 if self.last_event_name != Events.DATALOADER_STOP_ITERATION:
806 self._fire_event(Events.GET_BATCH_STARTED)
--> 807 self.state.batch = next(self._dataloader_iter)
808 self._fire_event(Events.GET_BATCH_COMPLETED)
809 iter_counter += 1

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/data.py:531, in ParallelDataLoader.next(self)
529 def next(self) -> List[torch.Tensor]:
530 return functools.reduce(
--> 531 operator.add, [self._next(i) for i in range(self.num_loaders)]
532 )

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/data.py:531, in (.0)
529 def next(self) -> List[torch.Tensor]:
530 return functools.reduce(
--> 531 operator.add, [self._next(i) for i in range(self.num_loaders)]
532 )

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/data.py:522, in ParallelDataLoader._next(self, i)
520 def _next(self, i: int) -> List[torch.Tensor]:
521 try:
--> 522 return next(self.iterators[i])
523 except StopIteration as e:
524 if self.cycle_flags[i]:

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/torch/utils/data/dataloader.py:530, in _BaseDataLoaderIter.next(self)
528 if self._sampler_iter is None:
529 self._reset()
--> 530 data = self._next_data()
531 self._num_yielded += 1
532 if self._dataset_kind == _DatasetKind.Iterable and
533 self._IterableDataset_len_called is not None and
534 self._num_yielded > self._IterableDataset_len_called:

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/torch/utils/data/dataloader.py:570, in _SingleProcessDataLoaderIter._next_data(self)
568 def _next_data(self):
569 index = self._next_index() # may raise StopIteration
--> 570 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
571 if self._pin_memory:
572 data = _utils.pin_memory.pin_memory(data)

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py:49, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py:49, in (.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/scglue.py:160, in AnnDataset.getitem(self, index)
158 shuffle_idx = self.shuffle_idx[s].T
159 shuffle_pmsk = self.shuffle_pmsk[s]
--> 160 items = [
161 torch.as_tensor(self._index_array(data, idx))
162 for extracted_data in self.extracted_data
163 for idx, data in zip(shuffle_idx, extracted_data)
164 ]
165 items.append(torch.as_tensor(shuffle_pmsk))
166 return items

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/scglue.py:161, in (.0)
158 shuffle_idx = self.shuffle_idx[s].T
159 shuffle_pmsk = self.shuffle_pmsk[s]
160 items = [
--> 161 torch.as_tensor(self._index_array(data, idx))
162 for extracted_data in self.extracted_data
163 for idx, data in zip(shuffle_idx, extracted_data)
164 ]
165 items.append(torch.as_tensor(shuffle_pmsk))
166 return items

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/scglue/models/scglue.py:176, in AnnDataset._index_array(arr, idx)
174 arr = arr[sorted_idx][rank] # Convert to sequantial access and back
175 else:
--> 176 arr = arr[idx]
177 return arr.toarray() if scipy.sparse.issparse(arr) else arr

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/pandas/core/frame.py:3511, in DataFrame.getitem(self, key)
3509 if is_iterator(key):
3510 key = list(key)
-> 3511 indexer = self.columns._get_indexer_strict(key, "columns")[1]
3513 # take() does not accept boolean indexers
3514 if getattr(indexer, "dtype", None) == bool:

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/pandas/core/indexes/base.py:5782, in Index._get_indexer_strict(self, key, axis_name)
5779 else:
5780 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5782 self._raise_if_missing(keyarr, indexer, axis_name)
5784 keyarr = self.take(indexer)
5785 if isinstance(key, Index):
5786 # GH 42790 - Preserve name from an Index

File ~/miniconda3/envs/sclab/lib/python3.8/site-packages/pandas/core/indexes/base.py:5842, in Index._raise_if_missing(self, key, indexer, axis_name)
5840 if use_interval_msg:
5841 key = list(key)
-> 5842 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5844 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
5845 raise KeyError(f"{not_found} not in index")

KeyError: "None of [Int64Index([ 522, 1696, 1553, 2176, 1928, 1895, 176, 2006, 315, 571, 1167,\n 959, 819, 299, 692, 1164, 1175, 997, 1038, 750, 766, 44,\n 545, 1370, 2189, 1810, 1191, 1487, 1167, 1710, 601, 2030],\n dtype='int64')] are in the [columns]"

@Jeff1995 Jeff1995 added bug Something isn't working and removed question Further information is requested labels Jun 20, 2022
@Jeff1995
Copy link
Collaborator

Jeff1995 commented Jun 21, 2022

The bug should have been fixed in the v0.2.3 release. Could you help verify? Thanks a lot!

@LMH0066
Copy link
Author

LMH0066 commented Jun 23, 2022

The bug should have been fixed in the v0.2.3 release. Could you help verify? Thanks a lot!

It worked correctly!

@LMH0066 LMH0066 closed this as completed Jun 23, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants