# Quantization and Pruning for OpenVLA-7B
This notebook demonstrates how to prepare the OpenVLA-7B model for quantization and pruning.

In [None]:
# Install dependencies (if running in a fresh environment)
!pip install transformers accelerate bitsandbytes optimum

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

In [None]:
model_name = 'openvla/OpenVLA-7B'
# Configure 4-bit quantization using bitsandbytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Apply global unstructured pruning to linear layers
import torch.nn.utils.prune as prune
parameters_to_prune = []
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Linear):
        parameters_to_prune.append((module, 'weight'))
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2,  # prune 20% of connections
)

In [None]:
# Save the pruned and quantized model
save_path = 'openvla7b_pruned_quantized'
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f'Model saved to {save_path}')