In [None]:
from transformers import pipeline
from PIL import Image
import requests
from datasets import load_dataset
import torch
import uuid
from transformers import AutoProcessor, LlavaForConditionalGeneration, Trainer, TrainingArguments
from transformers import set_seed
from transformers import pipeline
import numpy as np

# Set a seed for reproducibility
set_seed(42)

PATH_TO_SAVE = "/content/drive/MyDrive/"

model_id = "llava-hf/llava-1.5-7b-hf"
model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

processor = AutoProcessor.from_pretrained(model_id)

def preprocess_data(examples):
    conversation = examples['conversations']
    image_path = f"part-000001/{str(uuid.uuid4())}.jpg"  # Generate a unique image path
    texts = [f'USER: <image>\n{examples["conversations"][conv]["value"]}\nASSISTANT:' for conv in examples["conversations"]]

    outputs = [conv["value"] for conv in conversation]
    encoding = processor(texts, image_path, padding=True, truncation=True, return_tensors="pt")

    for k, v in encoding.items():
        encoding[k] = v.squeeze()

    targets = [processor.tokenizer.encode(x, add_special_tokens=False) +
               [processor.tokenizer.eos_token_id] for x in outputs]

    encoding["labels"] = targets
    return encoding

dataset = load_dataset('/content/drive/MyDrive', split='train')
processed_dataset = dataset.map(
    preprocess_data, batched=True, remove_columns=['conversations'])

# Quantization
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

# Define training arguments
training_args = TrainingArguments(
    output_dir=PATH_TO_SAVE,
    per_device_train_batch_size=1,
    num_train_epochs=1,
    save_steps=200,
    logging_steps=50,
    learning_rate=5e-5,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
)

# Trainer with quantized model
trainer = Trainer(
    model=quantized_model,
    args=training_args,
    train_dataset=processed_dataset,
    tokenizer=processor.tokenizer,
)

# Train the quantized model
trainer.train()

# Evaluate the quantized model (optional)
results = trainer.evaluate()
print(results)

# Save the quantized model
quantized_model.save_pretrained(PATH_TO_SAVE)
processor.save_pretrained(PATH_TO_SAVE)

# Optionally, push the quantized model to the Hugging Face Model Hub
trainer.push_to_hub()

# Example of generating responses using the quantized model
text_generator = pipeline(
    'text-generation', model=quantized_model, tokenizer=processor.tokenizer)
generated_response = text_generator(
    "USER: <image>\nSome input text\nASSISTANT:")
print("Generated Response:", generated_response[0]['generated_text'])


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/112 [00:00<?, ? examples/s]

TypeError: list indices must be integers or slices, not str

In [None]:
!pip install accelerate