## Setup and Imports

In [None]:
!pip install -Uq diffusers
!pip install -Uq transformers
!pip install -Uq bitsandbytes

In [None]:
from transformers import T5EncoderModel
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig

from diffusers import CogVideoXTransformer3DModel, AutoencoderKLCogVideoX
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
import torch

In [None]:
from huggingface_hub import notebook_login

In [None]:
notebook_login()

# Text Encoders

In [None]:
model_id = "THUDM/CogVideoX-5b"

nf4_config = TransformersBitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

text_encoder = T5EncoderModel.from_pretrained(
    model_id, subfolder="text_encoder",
    quantization_config=nf4_config,
    torch_dtype=torch.float16
)

In [None]:
text_encoder.save_pretrained("text_encoder")

## Diffusion Transformers

In [None]:
model_id = "THUDM/CogVideoX-5b"

nf4_config = DiffusersBitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

transformer = CogVideoXTransformer3DModel.from_pretrained(
    model_id,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.float16
)
vae = AutoencoderKLCogVideoX.from_pretrained(
    model_id,
    subfolder="vae",
    quantization_config=nf4_config,
    torch_dtype=torch.float16
)

In [None]:
transformer.save_pretrained("transformer")
vae.save_pretrained("vae")

## Upload the model to Hub

In [None]:
from huggingface_hub import create_repo, upload_folder

repo_id = create_repo(repo_id="ariG23498/CogVideoX-5b-nf4-pkg", private=True, repo_type="model").repo_id

upload_folder(repo_id=repo_id, folder_path="text_encoder", path_in_repo="text_encoder")
upload_folder(repo_id=repo_id, folder_path="transformer", path_in_repo="transformer")
upload_folder(repo_id=repo_id, folder_path="vae", path_in_repo="vae")