generated from fastai/nbdev_template
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Closed
Description
accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml examples/scripts/sentiment_tuning.py --batch_size 32 --model_name tiiuae/falcon-7b --mini_batch_size 1 --
log_with wandb
Trace
Traceback (most recent call last):
File "examples/scripts/sentiment_tuning.py", line 154, in <module>
model = trl_model_class.from_pretrained(
File "/fsx/costa/trl/trl/models/modeling_base.py", line 199, in from_pretrained
pretrained_model = cls.transformers_parent_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 479, in from_pretrained
return model_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2881, in from_pretrained
) = cls._load_pretrained_model(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 3278, in _load_pretrained_model
raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
RuntimeError: Error(s) in loading state_dict for RWForCausalLM:
size mismatch for transformer.h.0.self_attention.query_key_value.weight: copying a param with shape torch.Size([4672, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.0.self_attention.dense.weight: copying a param with shape torch.Size([4544, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.0.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([18176, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.0.mlp.dense_4h_to_h.weight: copying a param with shape torch.Size([4544, 18176]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.1.self_attention.query_key_value.weight: copying a param with shape torch.Size([4672, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.1.self_attention.dense.weight: copying a param with shape torch.Size([4544, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
size mismatch for transformer.h.1.mlp.dense_h_to_4h.weight: copying a param with shape torch.Size([18176, 4544]) from checkpoint, the shape in current model is torch.Size([0]).
...
Traceback (most recent call last):
File "examples/scripts/sentiment_tuning.py", line 154, in <module>
model = trl_model_class.from_pretrained(
File "/fsx/costa/trl/trl/models/modeling_base.py", line 199, in from_pretrained
model = trl_model_class.from_pretrained(
File "/fsx/costa/trl/trl/models/modeling_base.py", line 199, in from_pretrained
pretrained_model = cls.transformers_parent_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 479, in from_pretrained
pretrained_model = cls.transformers_parent_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/models/auto/auto_factory.py", line 479, in from_pretrained
return model_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2881, in from_pretrained
return model_class.from_pretrained(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 2881, in from_pretrained
) = cls._load_pretrained_model(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 3278, in _load_pretrained_model
) = cls._load_pretrained_model(
File "/admin/home/costa/.pyenv/versions/3.8.11/envs/trl/lib/python3.8/site-packages/transformers/modeling_utils.py", line 3278, in _load_pretrained_model
raise RuntimeError(f"Error(s) in loading state_dict for {model.__class__.__name__}:\n\t{error_msg}")
Metadata
Metadata
Assignees
Labels
No labels