New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Accumulate gradient #507
Comments
Hi! Is there an error if you set |
I had the same issue, not sure what exactly fixed it, but try to run the same with catalyst 19.11.1 |
Hi! Thanks for your reply. When I set the step to 1 |
Okay, this is interesting. What version of Catalyst do you use? |
I am using version 19.10.2. I installed by |
Did it work when testing this module? |
I looked in that line of code, Hope this information is helpful. If you want me to check for any other information, please feel free to let me know. |
Hi! I think I found the reason. When I pass an I will continue to figure out why |
I found the reason. It is because the order of the OptimzerCallback is small as you can see in |
Maybe this is a bug. When I set catalyst/catalyst/dl/callbacks/optimizer.py Line 139 in af3572f
|
Me too, but when I tried to reproduce this issue with segmentation tutorial, I couldn't.
|
@wmmxk try to add |
could you reproduce the issue with |
should be already fixed :) |
I was trying to use the accumulate gradient feature but run into an error. The training works without the
OptimizerCallback(accmulation_steps=2)
.FYI, the error message:
0/60 * Epoch (train): 0% 0/624 [00:00<?, ?it/s]
TypeError Traceback (most recent call last)
in
9 logdir=logdir,
10 num_epochs=num_epochs,
---> 11 verbose=True
12 )
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/runner/supervised.py in train(self, model, criterion, optimizer, loaders, logdir, callbacks, scheduler, resume, num_epochs, valid_loader, main_metric, minimize_metric, verbose, state_kwargs, checkpoint_data, fp16, monitoring_params, check)
195 monitoring_params=monitoring_params
196 )
--> 197 self.run_experiment(experiment, check=check)
198
199 def infer(
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in run_experiment(self, experiment, check)
229 except (Exception, KeyboardInterrupt) as ex:
230 self.state.exception = ex
--> 231 self._run_event("exception")
232
233 return self
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in run_event(self, event)
100
101 if self.state is not None and hasattr(self.state, f"on{event}post"):
--> 102 getattr(self.state, f"on{event}_post")()
103
104 @AbstractMethod
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/state.py in on_exception_post(self)
183 def on_exception_post(self):
184 for logger in self.loggers.values():
--> 185 logger.on_exception(self)
186
187
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/callbacks/logging.py in on_exception(self, state)
194
195 if state.need_reraise_exception:
--> 196 raise exception
197
198
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in run_experiment(self, experiment, check)
226 try:
227 for stage in self.experiment.stages:
--> 228 self._run_stage(stage)
229 except (Exception, KeyboardInterrupt) as ex:
230 self.state.exception = ex
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in _run_stage(self, stage)
199
200 self._run_event("epoch_start")
--> 201 self._run_epoch(loaders)
202 self._run_event("epoch_end")
203
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in _run_epoch(self, loaders)
186 self._run_event("loader_start")
187 with torch.set_grad_enabled(self.state.need_backward):
--> 188 self._run_loader(loader)
189 self._run_event("loader_end")
190
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in _run_loader(self, loader)
148
149 for i, batch in enumerate(loader):
--> 150 self._run_batch(batch)
151
152 self.state.timer.reset()
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in _run_batch(self, batch)
130 self.state.timer.stop("_timers/model_time")
131 self.state.timer.stop("_timers/batch_time")
--> 132 self._run_event("batch_end")
133
134 def _run_loader(self, loader):
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/runner.py in run_event(self, event)
97 if self.callbacks is not None:
98 for callback in self.callbacks.values():
---> 99 getattr(callback, f"on{event}")(self.state)
100
101 if self.state is not None and hasattr(self.state, f"on_{event}_post"):
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/callbacks/optimizer.py in on_batch_end(self, state)
117 return
118
--> 119 loss = self._get_loss(state)
120
121 self._accumulation_counter += 1
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/callbacks/optimizer.py in _get_loss(self, state)
91
92 def _get_loss(self, state) -> torch.Tensor:
---> 93 loss = state.get_key(key="loss", inner_key=self.loss_key)
94
95 if isinstance(loss, list):
~/.conda/envs/mmdet_cloud/lib/python3.6/site-packages/catalyst/dl/core/state.py in get_key(self, key, inner_key)
114 return getattr(self, key)
115 else:
--> 116 return getattr(self, key)[inner_key]
117
118 def set_key(self, value, key, inner_key=None):
TypeError: 'NoneType' object is not subscriptable
The text was updated successfully, but these errors were encountered: