Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I find I cannot load from fined LoRA checkpoint #418

Closed
YerongLi opened this issue Aug 7, 2024 · 5 comments
Closed

I find I cannot load from fined LoRA checkpoint #418

YerongLi opened this issue Aug 7, 2024 · 5 comments
Assignees

Comments

@YerongLi
Copy link

YerongLi commented Aug 7, 2024

import torch
from transformers import AutoModel, AutoTokenizer, AutoConfig

# Load the base model and tokenizer
model_path = "/home/yerong2/models/internlm-xcomposer2d5-7b"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# Load the adapter/finetuned weights
checkpoint_path = "lorra_finetune/src/finetune/output/finetune_lora/checkpoint-2/"
adapter_weights = torch.load(f"{checkpoint_path}/adapter_model.bin")

# Merge the adapter weights with the base model
model.load_state_dict(adapter_weights, strict=True)

# Verify that the model has loaded the weights
print("Model successfully loaded with finetuned weights.")

model.tokenizer = tokenizer

query = 'Image1 <ImageHere>; Image2 <ImageHere>; Image3 <ImageHere>; I want to buy a car from the three given cars, analyze their advantages and weaknesses one by one'
image = ['../examples/cars1.jpg',
        '../examples/cars2.jpg',
        '../examples/cars3.jpg',]
with torch.autocast(device_type='cuda', dtype=torch.float16):
    response, his = model.chat(tokenizer, query, image, do_sample=False, num_beams=3, use_meta=True)
print(response)

This reports error:

[rank1]: Traceback (most recent call last):
[rank1]:   File "/home/yerong2/representation-engineering/lorra_finetune/src/mllm_lorra.py", line 580, in <module>
[rank1]:     train()
[rank1]:   File "/home/yerong2/representation-engineering/lorra_finetune/src/mllm_lorra.py", line 491, in train
[rank1]:     model.load_state_dict(adapter_weights, strict=True)
[rank1]:   File "/home/yerong2/local/miniconda3/envs/mllm/lib/python3.11/site-packages/torch/nn/modules/module.py", line 2189, in load_state_dict
[rank1]:     raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
[rank1]: RuntimeError: Error(s) in loading state_dict for InternLMXComposer2ForCausalLM:
[rank1]:        Missing key(s) in state_dict: "plora_glb_GN", "plora_sub_GN", "model.tok_embeddings.weight", "model.layers.0.attention.wqkv.weight", "model.layers.0.attention.wqkv.Plora_A.weight", "model.layers.0.attention.wqkv.Plora_B.weight", "model.layers.0.attention.wqkv.lora_sft_A.weight", "model.layers.0.attention.wqkv.lora_sft_B.weight", "model.layers.0.attention.wqkv.lora_dpo_A.weight", "model.layers.0.attention.wqkv.lora_dpo_B.weight", "model.layers.0.attention.wqkv.lora_web_A.weight", "model.layers.0.attention.wqkv.lora_web_B.weight", "model.layers.0.attention.wo.weight", "model.layers.0.attention.wo.Plora_A.weight", "model.layers.0.attention.wo.Plora_B.weight", "model.layers.0.attention.wo.lora_sft_A.weight", "model.layers.0.attention.wo.lora_sft_B.weight", "model.layers.0.attention.wo.lora_dpo_A.weight", "model.layers.0.attention.wo.lora_dpo_B.weight", "model.layers.0.attention.wo.lora_web_A.weight", "model.layers.0.attention.wo.lora_web_B.weight", "model.layers.0.feed_forward.w1.weight", "model.layers.0.feed_forward.w1.Plora_A.weight", "model.layers.0.feed_forward.w1.Plora_B.weight", "model.layers.0.feed_forward.w1.lora_sft_A.weight", "model.layers.0.feed_forward.w1.lora_sft_B.weight", "model.layers.0.feed_forward.w1.lora_dpo_A.weight", "model.layers.0.feed_forward.w1.lora_dpo_B.weight", "model.layers.0.feed_forward.w1.lora_web_A.weight", "model.layers.0.feed_forward.w1.lora_web_B.weight", "model.layers.0.feed_forward.w3.weight", "model.layers.0.feed_forward.w3.Plora_A.weight", "model.layers.0.feed_forward.w3.Plora_B.weight", "model.layers.0.feed_forward.w3.lora_sft_A.weight", "model.layers.0.feed_forward.w3.lora_sft_B.weight", "model.layers.0.feed_forward.w3.lora_dpo_A.weight", "model.layers.0.feed_forward.w3.lora_dpo_B.weight", "model.layers.0.feed_forward.w3.lora_web_A.weight", "model.layers.0.feed_forward.w3.lora_web_B.weight", "model.layers.0.feed_forward.w2.weight", "model.layers.0.feed_forward.w2.Plora_A.weight", "model.layers.0.feed_forward.w2.Plora_B.weight", "model.layers.0.feed_forward.w2.lora_sft_A.weight", "model.layers.0.feed_forward.w2.lora_sft_B.weight", "model.layers.0.feed_forward.w2.lora_dpo_A.weight", "model.layers.0.feed_forward.w2.lora_dpo_B.weight", "m
@YerongLi
Copy link
Author

YerongLi commented Aug 7, 2024

Use PeftModel from pretrained seems to work

import torch
from transformers import AutoModel, AutoTokenizer, AutoConfig
from peft import PeftModel    

# Load the base model and tokenizer
model_path = "/home/yerong2/models/internlm-xcomposer2d5-7b"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# Load the adapter/finetuned weights
checkpoint_path = "lorra_finetune/math/best"
# adapter_weights = torch.load(f"{checkpoint_path}/adapter_model.bin")
model = PeftModel.from_pretrained(model, checkpoint_path)
model = model.merge_and_unload()
# Verify that the model has loaded the weights
print("Model successfully loaded with finetuned weights.")

model.tokenizer = tokenizer

query = 'Image1 <ImageHere>; Image2 <ImageHere>; Image3 <ImageHere>; I want to buy a car from the three given cars, analyze their advantages and weaknesses one by one'
image = ['./examples/cars1.jpg',
        './examples/cars2.jpg',
        './examples/cars3.jpg',]
with torch.autocast(device_type='cuda', dtype=torch.float16):
    response, his = model.chat(tokenizer, query, image, do_sample=False, num_beams=3, use_meta=True)
print(response)

@YerongLi YerongLi closed this as completed Aug 7, 2024
@YerongLi
Copy link
Author

Use PeftModel from pretrained seems to work

import torch
from transformers import AutoModel, AutoTokenizer, AutoConfig
from peft import PeftModel    

# Load the base model and tokenizer
model_path = "/home/yerong2/models/internlm-xcomposer2d5-7b"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# Load the adapter/finetuned weights
checkpoint_path = "lorra_finetune/math/best"
# adapter_weights = torch.load(f"{checkpoint_path}/adapter_model.bin")
model = PeftModel.from_pretrained(model, checkpoint_path)
model = model.merge_and_unload()
# Verify that the model has loaded the weights
print("Model successfully loaded with finetuned weights.")

model.tokenizer = tokenizer

query = 'Image1 <ImageHere>; Image2 <ImageHere>; Image3 <ImageHere>; I want to buy a car from the three given cars, analyze their advantages and weaknesses one by one'
image = ['./examples/cars1.jpg',
        './examples/cars2.jpg',
        './examples/cars3.jpg',]
with torch.autocast(device_type='cuda', dtype=torch.float16):
    response, his = model.chat(tokenizer, query, image, do_sample=False, num_beams=3, use_meta=True)
print(response)

Actually this merge is not effective, I think it just loaded the original model from HF

@YerongLi YerongLi reopened this Aug 16, 2024
@yuhangzang
Copy link
Collaborator

You may read the document and use the merge_peft_adapter.py to merge the LoRA weight.

@YerongLi
Copy link
Author

#423 I found with multiple GPU, deepspeed+LoRA saves an invalid checkpoint for some reason.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants