You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
When i set export_to_onnx true in yaml config, process crash on epoch end (in ModelCheckpointWithOnnx).
To Reproduce
run python -m torchok -cp ../examples/configs -cn representation_arcface_sop (before set export_to_onnx as true).
Actual behavior
Traceback (most recent call last):
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/main.py", line 38, in entrypoint
trainer.fit(model, ckpt_path=config.resume_path)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 770, in fit
self._call_and_handle_interrupt(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 723, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 811, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1236, in _run
results = self._run_stage()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1323, in _run_stage
return self._run_train()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1353, in _run_train
self.fit_loop.run()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/loops/base.py", line 205, in run
self.on_advance_end()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py", line 297, in on_advance_end
self.trainer._call_callback_hooks("on_train_epoch_end")
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1636, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 309, in on_train_epoch_end
self._save_last_checkpoint(trainer, monitor_candidates)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 644, in _save_last_checkpoint
self._save_checkpoint(trainer, filepath)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/callbacks/model_checkpoint_with_onnx.py", line 43, in _save_checkpoint
model.to_onnx(filepath + self.ONNX_EXTENSION, (*input_tensors,), **self.onnx_params)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/core/lightning.py", line 1888, in to_onnx
torch.onnx.export(self, input_sample, file_path, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/init.py", line 350, in export
return utils.export(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 163, in export
_export(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 1074, in _export
graph, params_dict, torch_out = _model_to_graph(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 727, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 602, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 517, in _trace_and_get_graph_from_model
trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 1175, in _get_trace_graph
outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 127, in forward
graph, out = torch._C._create_graph_by_tracing(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 118, in wrapper
outs.append(self.inner(*trace_inputs))
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/tasks/classification.py", line 51, in forward
x = self.backbone(x)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/models/backbones/swin.py", line 250, in forward
x = self._forward_patch_emb(x)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/models/backbones/swin.py", line 211, in _forward_patch_emb
x = self.patch_embed(x)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/timm/models/layers/patch_embed.py", line 32, in forward
B, C, H, W = x.shape
ValueError: not enough values to unpack (expected 4, got 3)
Environment:
OS: [e.g. Ubuntu 22.04]
CUDA: [e.g. 11.6]
PyTorch: [e.g. 12.0]
PyTorch Lightning: [e.g. 1.6.5]
The text was updated successfully, but these errors were encountered:
Describe the bug
When i set
export_to_onnx
true in yaml config, process crash on epoch end (in ModelCheckpointWithOnnx).To Reproduce
run
python -m torchok -cp ../examples/configs -cn representation_arcface_sop
(before setexport_to_onnx
as true).Actual behavior
Traceback (most recent call last):
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/main.py", line 38, in entrypoint
trainer.fit(model, ckpt_path=config.resume_path)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 770, in fit
self._call_and_handle_interrupt(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 723, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 811, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1236, in _run
results = self._run_stage()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1323, in _run_stage
return self._run_train()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1353, in _run_train
self.fit_loop.run()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/loops/base.py", line 205, in run
self.on_advance_end()
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py", line 297, in on_advance_end
self.trainer._call_callback_hooks("on_train_epoch_end")
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 1636, in _call_callback_hooks
fn(self, self.lightning_module, *args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 309, in on_train_epoch_end
self._save_last_checkpoint(trainer, monitor_candidates)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 644, in _save_last_checkpoint
self._save_checkpoint(trainer, filepath)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/callbacks/model_checkpoint_with_onnx.py", line 43, in _save_checkpoint
model.to_onnx(filepath + self.ONNX_EXTENSION, (*input_tensors,), **self.onnx_params)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/pytorch_lightning/core/lightning.py", line 1888, in to_onnx
torch.onnx.export(self, input_sample, file_path, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/init.py", line 350, in export
return utils.export(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 163, in export
_export(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 1074, in _export
graph, params_dict, torch_out = _model_to_graph(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 727, in _model_to_graph
graph, params, torch_out, module = _create_jit_graph(model, args)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 602, in _create_jit_graph
graph, torch_out = _trace_and_get_graph_from_model(model, args)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/onnx/utils.py", line 517, in _trace_and_get_graph_from_model
trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 1175, in _get_trace_graph
outs = ONNXTracedModule(f, strict, _force_outplace, return_inputs, _return_inputs_states)(*args, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 127, in forward
graph, out = torch._C._create_graph_by_tracing(
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/jit/_trace.py", line 118, in wrapper
outs.append(self.inner(*trace_inputs))
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/tasks/classification.py", line 51, in forward
x = self.backbone(x)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/models/backbones/swin.py", line 250, in forward
x = self._forward_patch_emb(x)
File "/workdir/vpatrushev/torchOKsmall2/torchok/torchok/models/backbones/swin.py", line 211, in _forward_patch_emb
x = self.patch_embed(x)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1118, in _slow_forward
result = self.forward(*input, **kwargs)
File "/opt/conda/envs/torchok/lib/python3.9/site-packages/timm/models/layers/patch_embed.py", line 32, in forward
B, C, H, W = x.shape
ValueError: not enough values to unpack (expected 4, got 3)
Environment:
The text was updated successfully, but these errors were encountered: