## Query screenshots using OpenAI GPT-4o-mini

The image descriptions were created using OpenAI as well (script `generate_image_descriptions.py`). This will send all the descriptions to OpenAI and ask for the images that are most related to the query.

#### Define the system prompt for OpenAI

In [None]:
N_TOP_IMAGES = 12
N_IMAGES = 400
IMAGE_DIR = "../data/my_screenshots/"

initial_context = """
Your job is to find the top {N_TOP_IMAGES} best screenshots based on a query and the image descriptions. 

Bellow there a bunch of image descriptors preceded by their filename. Make sure that, in your answer
you only include the image filenames in order from most relevant to least relevant from the chosen {N_TOP_IMAGES}.

Example, given a query like this:

"Which screenshots appear to be tech products?""

You should answer like, without text before this and without numbers before the image names (THIS IS AN FORMAT EXAMPLE, THESE IMAGES DON'T EXISTS ):
IMAGE_01.PNG
IMAGE_02.jpeg
...

First there is a list of image filenames, only answer with these filenames in the order of relevance:

{image_filenames}

The following is a list of image descriptor (one for each image), you should use these to answer the query.

{image_descriptors}

"""

In [None]:
import os, random

images_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) 
                            if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
random.shuffle(images_files)

print("Total number of images: ", len(images_files))
print("Liminting to ", N_IMAGES)

image_list_context = ""
descriptions_context = ""
for img in images_files[:N_IMAGES]: # limit to not exceed no tokens
    if os.path.exists(f"{img}.txt"):
        with open(f"{img}.txt" , "r") as f:
            image_list_context += f"{os.path.basename(img)}\n"
            descriptions_context += f"\n\n{os.path.basename(img)}:\n"
            descriptions_context += f.read()

initial_context = initial_context.format(N_TOP_IMAGES=N_TOP_IMAGES, 
                                            image_filenames=image_list_context, image_descriptors=descriptions_context)
print(initial_context)
    

In [None]:
from llm_wrapper import LLM_Wrapper, LLM_Models

llm_wrapper = LLM_Wrapper(LLM_Models.OPENAI_GPT4_MINI, temperature=0.2)
llm_wrapper.send_message(initial_context, role="system")
print("Set initial context")

In [None]:
from bokeh.plotting import output_notebook, show
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import HBox, VBox

from utils import image_and_descriptions_plot

output_notebook()

N_COLUMNS = 4
N_ROWS = 3

text_input = widgets.Text(value='Which screenshots show 3d printing models?', 
                          placeholder='Enter text...', description='Filter text:', 
    disabled=False, style={'description_width': 'initial'}, layout=widgets.Layout(width='100%', height='40px'))

filter_button = widgets.Button(description='Filter it', button_style='success')

widgets_box = VBox([text_input, filter_button])

display(widgets_box)



def on_button_click(b):
    clear_output(wait=True)
    display(widgets_box)
    
    q_str = "Query: "+text_input.value
    assistant_res = llm_wrapper.send_message(q_str, role="user")
    
    images_retrieved = assistant_res.split("\n")
    image_paths = []
    for img in images_retrieved:
        img_path = os.path.join(IMAGE_DIR, img).strip()
        if os.path.exists(img_path):
            image_paths.append(img_path)
        else:
            print(f"Image {img} not found")
    # images_paths = [os.path.join(IMAGE_DIR, img) for img in images_retrieved]
    p = image_and_descriptions_plot(image_paths, N_COLUMNS, N_ROWS)
    show(p)

filter_button.on_click(on_button_click)
