In [13]:
# Importar las librerías necesarias
import pandas as pd
from transformers import ChameleonProcessor, ChameleonForConditionalGeneration, AdamW, get_scheduler, AutoConfig
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
import torch
from PIL import Image
import requests
import os
from huggingface_hub import login
from app.config.environments import TOKEN_HUGGINGFACE, MOCK_DATA_FOLDER


if not os.path.exists(MOCK_DATA_FOLDER):
    os.makedirs(MOCK_DATA_FOLDER)

In [14]:
# Función para cargar imágenes desde una URL
def load_image(url):
    return Image.open(requests.get(url, stream=True).raw)

In [16]:
# Preparar mock data y guardarlo en un CSV
# this data will be replace
data = {
    "text": [
        "Un perro corriendo en el parque.",
        "Un gato saltando desde un árbol.",
        "Un niño jugando con una pelota.",
        "Un coche rojo estacionado en la calle.",
        "Una hermosa puesta de sol.",
        "Una taza de café en una mesa.",
        "Un grupo de personas en una fiesta."
    ],
    "image_url": [
        "https://cdn.pixabay.com/photo/2022/09/11/15/06/dog-7447075_1280.jpg",
        "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS11VtfGtvMCf5fk33z6SSMk0KLUdtg1_OG7g&s",
        "https://st2.depositphotos.com/2751239/7710/i/450/depositphotos_77108351-stock-photo-two-cute-little-kids-playing.jpg",
        "https://img.freepik.com/fotos-premium/coche-rojo-esta-estacionado-calle-frente-edificio_1089043-92402.jpg",
        "https://i.pinimg.com/236x/d4/8a/ea/d48aea840f5de4bcf2eb4325607b277f.jpg",
        "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS6fjlbTD3cmnHvy1KqXZD5b19_9nhWuEsTxg&s",
        "https://img.freepik.com/fotos-premium/grupo-personas-fiesta-gorro-fiesta_670382-21629.jpg",
    ]
}

df = pd.DataFrame(data)
df.to_csv(MOCK_DATA_FOLDER + '/mock_data.csv', index=False)

# Cargar datos del CSV
df = pd.read_csv(MOCK_DATA_FOLDER + '/mock_data.csv')

In [17]:
login(TOKEN_HUGGINGFACE)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\felip\.cache\huggingface\token
Login successful


In [19]:
# # Inicializar el procesador y modelo
# processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")
# model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-7b", torch_dtype=torch.bfloat16, device_map="cuda")

# Inicializar el procesador y modelo
# processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")

# # Inicializar modelo de forma "vacía"
# config = AutoConfig.from_pretrained("facebook/chameleon-7b")
# with init_empty_weights():
#     model = ChameleonForConditionalGeneration.from_config(config)

# # Cargar pesos usando accelerate
# model = load_checkpoint_and_dispatch(
#     model, "facebook/chameleon-7b", device_map="auto", dtype=torch.bfloat16
# )

# # Función para procesar y tokenizar datos
# def process_data(text, image_url):
#     image = load_image(image_url)
#     inputs = processor(text, images=image, return_tensors="pt").to(model.device)
#     return inputs

# Inicializar el procesador
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-7b")

# Inicializar modelo con pesos cargados usando accelerate
model = ChameleonForConditionalGeneration.from_pretrained(
    "facebook/chameleon-7b", 
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)


Some kwargs in processor config are unused and will not have any effect: image_token, image_seq_length. 


ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`

In [None]:
# Función para procesar y tokenizar datos
def process_data(text, image_url):
    image = load_image(image_url)
    inputs = processor(text, images=image, return_tensors="pt").to(model.device)
    return inputs

In [None]:
num_epochs = 3
optimizer = AdamW(model.parameters(), lr=5e-5)
num_training_steps = len(df) * num_epochs
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

model.train()
for epoch in range(num_epochs):
    for idx, row in df.iterrows():
        text = row['text']
        image_url = row['image_url']
        
        inputs = process_data(text, image_url)
        
        optimizer.zero_grad()
        outputs = model(**inputs, labels=inputs["input_ids"])
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        
        print(f'Epoch {epoch}, Step {idx}, Loss: {loss.item()}')

# Guardar modelo
model.save_pretrained('finetuned_chameleon')
processor.save_pretrained('finetuned_processor')

In [None]:
# Evaluación
model.eval()
for idx, row in df.iterrows():
    text = row['text']
    image_url = row['image_url']
    
    inputs = process_data(text, image_url)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=50)
    
    print(f'Input: {text}')
    print(f'Output: {processor.decode(outputs[0], skip_special_tokens=True)}\n')