In [53]:
import json
import os
import subprocess
from abc import ABC, abstractmethod
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import lorem

# from common.social_model import SocialModel, get_blip, get_clip, get_combined_1, get_combined_2, get_tuned_blip, get_tuned_clip
# from pipeline.data_cleaning_flow import clean_data_flow
# from pipeline.image_extraction_flow import extract_images_flow
# from pipeline.model_selection_flow import select_model_flow

In [54]:
class SocialModel(ABC):
    @abstractmethod
    def caption(self, str : str) -> str:
        pass

class BLIP_SocialModel(SocialModel):
    def __init__(self, path : str):
        self._processor = BlipProcessor.from_pretrained(path)
        self._model = BlipForConditionalGeneration.from_pretrained(path)
    
    def caption(self, image_path : str):
        raw_image = Image.open(image_path).convert('RGB')
        inputs = self._processor(images=raw_image, return_tensors="pt")
        outputs = self._model.generate(**inputs, num_beams=1, max_length=40)
        return self._processor.decode(outputs[0], skip_special_tokens=True)
    
class Random_SocialModel(SocialModel):
    def caption(self, image_path):
        return lorem.sentence()
    

def get_blip():
    return BLIP_SocialModel('../../../external/trained_models/blip_image_captioning_base')

def get_tuned_blip():
    return BLIP_SocialModel('../../../external/trained_models/blip_image_captioning_tuned')

def get_clip():
    pass

def get_tuned_clip():
    pass

def get_combined_1():
    pass

def get_combined_2():
    pass


In [55]:
def extract_images_flow():
    db = os.listdir("../../../external/dataset/output/")
    if len(db) != 0:
        return db[0]
    
    command = [
        'python',
        './image_text_sync/main.py',
        './external/dataset/data/pdfs',
        './external/dataset/data/images',
        './external/dataset/data/captions',
        './external/dataset/output/metadata.db'
    ]
    try:
        subprocess.run(command, check=True, text=True, capture_output=True)
        db = os.listdir("./external/dataset/output")
        return db[0]

    except subprocess.CalledProcessError as e:
        print("Error al ejecutar el comando")
        print(e.stderr)

In [56]:
def clean_data_flow():
    # Limpia la base de datos
    pass


In [57]:
def select_model_flow():
    hyperparams_id = os.environ.get('SOCIAL_HYPER_ID')
    model_id = os.environ.get('SOCIAL_MODEL')

    if hyperparams_id == None:
        raise Exception("Hyperparameters id cannot be none")

    model_id = 0 if model_id == None else model_id

    model : SocialModel = None
    if model_id == '0':
        model = get_blip()  
    elif model_id == '1':
        # model = get_tuned_blip()
        pass
    elif model_id == '2':
        model_id = get_clip()
    elif model_id == '3':
        model = get_tuned_clip()
    elif model_id == '4':
        model = get_combined_1()
    elif model_id == '5':
        model = get_combined_2()

    # Extract images from json
    images = []
    generated_captions = [model.caption(image) for image in images]

    with open(f'../../../external/results/results{hyperparams_id}.json', 'w') as file:
        file.write(json.dumps(generated_captions))
    pass


In [58]:
def pipeline():
    params_list: list[dict[str, str]]
    with open('../../../external/combinations.json', 'r') as file:
        params_list = json.load(file)

    for i, params in enumerate(params_list):
        print(f'Start batch {i}')
        os.environ['SOCIAL_HYPER_ID'] = F'{i}'
        for key, value in params.items():
            os.environ[key] = f'{value}'
        
        print('Extract images')
        extract_images_flow()

        if os.environ.get('SOCIAL_CLEANING') == '1':
            print('Clean data')
            clean_data_flow()

        print('Select model')
        select_model_flow()
        print(f'End batch {i}')

if __name__ == '__main__':
    pipeline()


Start batch 0
Extract images
Select model
End batch 0
Start batch 1
Extract images
Select model
End batch 1
Start batch 2
Extract images
Select model
End batch 2
Start batch 3
Extract images
Select model
End batch 3
Start batch 4
Extract images
Select model
End batch 4
Start batch 5
Extract images
Select model
End batch 5
Start batch 6
Extract images
Select model
End batch 6
Start batch 7
Extract images
Clean data
Select model
End batch 7
Start batch 8
Extract images
Clean data
Select model
End batch 8
Start batch 9
Extract images
Clean data
Select model
End batch 9
Start batch 10
Extract images
Clean data
Select model
End batch 10
Start batch 11
Extract images
Clean data
Select model
End batch 11
Start batch 12
Extract images
Clean data
Select model
End batch 12
Start batch 13
Extract images
Clean data
Select model
End batch 13
Start batch 14
Extract images
Select model
End batch 14
Start batch 15
Extract images
Select model
End batch 15
Start batch 16
Extract images
Select model
End 