-
Notifications
You must be signed in to change notification settings - Fork 6.5k
Closed
Closed
Copy link
Description
I want to save HiDream model in int4 (only text encoder 3 and transformer, one at a time).
This code save the other 3 (TE1, TE2 and VAE) but does not save text_encoder_3. Even waited for 30m. If I do not supply TE1, TE2,TE4 and VAE it throws error.
Any suggestions please.
import torch
from transformers import PreTrainedTokenizerFast, LlamaForCausalLM, T5EncoderModel
from optimum.quanto import freeze, qint4, quantize, quantization_map
from diffusers import (
UniPCMultistepScheduler,
HiDreamImagePipeline,
HiDreamImageTransformer2DModel,
)
base_repo = "HiDream-ai/HiDream-I1-Full"
output_dir = "HiDream-I1-Full-int4"
tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(
"meta-llama/Meta-Llama-3.1-8B-Instruct"
)
text_encoder_4 = LlamaForCausalLM.from_pretrained(
"meta-llama/Meta-Llama-3.1-8B-Instruct",
output_hidden_states=True,
output_attentions=True,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
quantize(text_encoder_4, weights=qint4)
freeze(text_encoder_4)
# Load and quantize text_encoder_3 only
text_encoder_3 = T5EncoderModel.from_pretrained(
base_repo,
subfolder="text_encoder_3",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
quantize(text_encoder_3, weights=qint4)
freeze(text_encoder_3)
# Set the scheduler as specified
scheduler = UniPCMultistepScheduler(
flow_shift=3.0,
prediction_type="flow_prediction",
use_flow_sigmas=True,
)
# Load the pipeline with default components (except text_encoder_3)
pipe = HiDreamImagePipeline.from_pretrained(
base_repo,
text_encoder_3=text_encoder_3, # Use our quantized text_encoder_3
tokenizer_4=tokenizer_4,
text_encoder_4=text_encoder_4,
transformer=None,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
pipe.scheduler = scheduler
# Save the pipeline with the quantized text_encoder_3
pipe.save_pretrained(output_dir)
print(f"Model successfully saved to {output_dir} with quantized text_encoder_3")
# If you want to verify the model was saved correctly:
print("Verifying saved model...")
# loaded_pipe = HiDreamImagePipeline.from_pretrained(output_dir, torch_dtype=torch.bfloat16)
print("Model loaded successfully!")It just stays there
(sddw-dev) C:\aiOWN\diffuser_webui>python HiDream-I1-Full-int4_SAVE.py
C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\transformers\generation\configuration_utils.py:817: UserWarning: `return_dict_in_generate` is NOT set to `True`, but `output_attentions` is. When `return_dict_in_generate` is not `True`, `output_attentions` is ignored.
warnings.warn(
C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\transformers\generation\configuration_utils.py:817: UserWarning: `return_dict_in_generate` is NOT set to `True`, but `output_hidden_states` is. When `return_dict_in_generate` is not `True`, `output_hidden_states` is ignored.
warnings.warn(
Loading checkpoint shards: 100%|████████████████████████████████████| 4/4 [00:00<00:00, 18.41it/s]
Loading checkpoint shards: 100%|████████████████████████████████████| 2/2 [00:02<00:00, 1.39s/it]
Loading pipeline components...: 100%|█████████████████████████████| 10/10 [00:07<00:00, 1.34it/s]
Metadata
Metadata
Assignees
Labels
No labels

