In [1]:
#!pip install ipywidget

In [2]:
# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html
# https://ipywidgets.readthedocs.io/en/8.0.5/_modules/ipywidgets/widgets/widget_upload.html

In [1]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import Ollama

SYSTEM_PROMPT = "You are an obsessive and detail-oriented assistant who specializes in reviewing images and describing their content in detail, especially recognizing accurately texts and numbers." # (string) system prompt (overrides what is defined in the Modelfile)
VERBOSE = True
CACHE = False
NUM_CTX = None # (int) Sets the size of the context window used to generate the next token. (Default: 2048)
NUM_GPU = None # (int) The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable.
NUM_THREAD = None # (int) Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores).
REPEAT_PENALTY = None # (float) Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
TEMPERATURE = None # (float) The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)
TEMPLATE = None # (string) full prompt or prompt template (overrides what is defined in the Modelfile)
TIMEOUT = None # (int) Timeout for the request stream

llm = Ollama(
    model="bakllava",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    base_url='http://host.docker.internal:11434',
    system=SYSTEM_PROMPT,
    verbose= VERBOSE,
    cache= CACHE,
    num_ctx= NUM_CTX,
    num_gpu= NUM_GPU,
    num_thread= NUM_THREAD,
    repeat_penalty= REPEAT_PENALTY,
    temperature= TEMPERATURE,
    template= TEMPLATE,
    timeout= TIMEOUT,
    
)

In [2]:
import base64
from io import BytesIO
import io

from IPython.display import HTML, display
from PIL import Image

def convert_to_base64(pil_image):
    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")  # You can change the format if needed
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return img_str


In [4]:
# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html#registering-callbacks-to-trait-changes-in-the-kernel
# uploader.value[0].content.tobytes()
# https://ipywidgets.readthedocs.io/en/latest/_modules/ipywidgets/widgets/widget_upload.html

import ipywidgets as widgets

label = widgets.Label(" Seleccione una imagen ")
uploader = widgets.FileUpload()

imagen = widgets.Image( width=200, placeholder='imagen')

def on_upload_change(change):
    imagen.value = change.new[0]['content']
    label.value = "Imagen Cargada"

uploader.observe(on_upload_change, names='value')
display(label, uploader, imagen)


Label(value=' Seleccione una imagen ')

FileUpload(value=(), description='Upload')

Image(value=b'', width='200')

In [6]:
from IPython.display import display
button = widgets.Button(description="Analizar Imagen")
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    with output:
        print('-- INICIANDO ANALISIS --')
        image_b64 = convert_to_base64(Image.open(io.BytesIO(uploader.value[0]['content'])))
        llm_with_image_context = llm.bind(images=[image_b64])
        llm_with_image_context.invoke("What does this image show?")
        print('\n')
        print('-- FINALIZADO --')
        

button.on_click(on_button_clicked)

Button(description='Analizar Imagen', style=ButtonStyle())

Output()