In [None]:
import torch
from lavis.models import load_model_and_preprocess

In [19]:
print(torch.__version__)


2.4.1+cu121


### Carga del Modelo

In [20]:
device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
print("Device available: ", device)

Device available:  cuda


In [23]:
print(str(device))
model, vis_processors, _ = load_model_and_preprocess(
    name="blip2_t5", model_type="pretrain_flant5xxl", is_eval=True, device=str(device)
)

cuda


Loading checkpoint shards:   0%|          | 0/5 [00:01<?, ?it/s]

  checkpoint = torch.load(cached_file, map_location="cpu")


### Carga de las preguntas

In [None]:
from orchestartors_delete.QuestionDataOrchestrator import QuestionDataOrchestrator
from data_controler.MongoDB_data_controler.MongoDBControler import MongoDBController

mongodb_controller = MongoDBController()
mongodb_connection = mongodb_controller.build_connection(host='localhost', port=27017)
mongodb_loader = mongodb_controller.build_loader(mongodb_connection, db_name='categorization_data')

orchestrator = QuestionDataOrchestrator(mongodb_loader)
text_file_paths = ["data/modeling/questions_txt/Represented_activities_categorization.txt",
                       "data/modeling/questions_txt/Subjet_categorization.txt", 
                        "data/modeling/questions_txt/Context_categorization.txt",]

for file_path in text_file_paths:
    orchestrator.update_questions_from_text(file_path)

In [None]:
subject = QuestionDataOrchestrator(mongodb_loader).load_data("Subjet").initialize_data()

In [None]:
activity = QuestionDataOrchestrator(mongodb_loader).load_data("Activitie").initialize_data()

In [None]:
context = QuestionDataOrchestrator(mongodb_loader).load_data("Context").initialize_data()

### Conexion con la base de datos de imagenes

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
token = os.getenv("HF_TOKEN")
repo_name = "Yorchz/mi-dataset-de-images"

### Carga del dataset

In [None]:
from data_controler.HuggingFace_data_controler.HuggingFaceController import HuggingFaceController

huggingface_controller = HuggingFaceController()
huggingface_connection = huggingface_controller.build_connection(token)
dataset = huggingface_controller.build_loader(repo_name).load_images()

### Creamos el csv para guardar los datos

In [None]:
import pandas as pd

headers = ['Imagen', 'Numero_personas', 'Relevancia', 'Sexo', 'Localidad', 'Edad', 'Tipo_grupo', 'Tipo_actividad', 'Actividad']

df = pd.DataFrame(columns=headers)
df.to_csv('pruebas/data1.csv', index=False)

In [None]:
def add_rows(data):
    new_rows = pd.DataFrame(data, columns=headers)
    
    df = pd.read_csv('pruebas/data1.csv')
    df = pd.concat([df, new_rows], ignore_index=True)
    df.to_csv('pruebas/data1.csv', index=False)

### Realizamos pregutas y subida de las respuestas 

In [None]:
prompts = [f"Question: {getattr(subject, f'question{i}')}? Answer:" for i in range(1, 7)] + \
          [f"Question: {getattr(activity, f'question{i}')}? Answer:" for i in range(1, 3)]
images = [item['image'] for item in dataset['train']]
labels = [item['label'] for item in dataset['train']]

In [None]:
def model_generate(image_tensor, prompt, model):
    image_tensor = image_tensor.unsqueeze(0).to(device)
    return model.generate({"image": image_tensor, "prompt": prompt})

In [None]:
import torchvision.transforms as transforms
import pandas as pd

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Ajusta según las dimensiones esperadas por el modelo
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Ajusta según los requisitos del modelo
])

for i in range(len(images)):
    image = images[i]
    label = labels[i]
    
    # Transformar la imagen a tensor
    image_tensor = transform(image)
    print(f"Image nº{i+1} processed")
    
    # Generar respuestas para cada prompt
    row = [label]
    for prompt in prompts:
        answer = model_generate(image_tensor, prompt, model)
        row.append(answer)
    
    # Añadir la fila al CSV
    add_rows([row])

### Intentos individuales para afinar las preguntas una a una 

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Ajusta según las dimensiones esperadas por el modelo
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Ajusta según los requisitos del modelo
])

In [None]:
print(subject.question1)

In [None]:
print(model_generate(transform(images[9]), subject.question1, model))

In [None]:
print(images[15])