In [11]:
import base64
from openai import OpenAI
import os

In [12]:
project_root_folder = "/home/ivan/Helmholtz/Conferences/APIworkshopHelmholtz2025/"
dataset_folder = os.path.join(project_root_folder, "AcevedoDataSet")
testset_folder =  os.path.join(dataset_folder, "test")
trainset_folder = os.path.join(dataset_folder, "train")
valset_folder = os.path.join(dataset_folder, "val")

In [2]:
client = OpenAI()

In [3]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
def gpt_api_text_inquiry(prompt_text, vlm_name='gpt-4o', **kwargs):
    response = client.chat.completions.create(
        model=vlm_name, #"o1-preview",
        messages=[
            {
                "role": "user", 
                "content": prompt_text
            }
        ]
    )

    answer=response.choices[0].message.content
    
    
    # Extract and print token usage
    usage = response.usage.total_tokens
    
    return answer, usage

In [6]:
prompt_text = "If you were old school, which videogame would you play? What machine learning model would you use to beat the game?"

answer, usage = gpt_api_text_inquiry(prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: If I were old school, a classic video game I might play is "Pac-Man," which was released in 1980 and remains iconic in the gaming world. To beat the game, I could use a machine learning approach such as Reinforcement Learning (RL), specifically a Deep Q-Network (DQN).

Reinforcement Learning is well-suited for this type of game because it focuses on learning optimal actions in an environment to maximize a cumulative reward. In the context of Pac-Man, the environment comprises the maze, dots, power-pellets, ghosts, and walls. The agent (Pac-Man) would learn strategies, like when and how to avoid ghosts or when to chase them after eating a power-pellet.

A Deep Q-Network uses a neural network to approximate the Q-values (value of taking a certain action in a given state) and improves on traditional Q-learning by allowing the RL agent to make use of convoluted sensory input, such as screen pixels. Additionally, techniques like experience replay and target networks can stabilize an

In [15]:
def gpt_api_visual_inquiry(image_path, prompt_text, vlm_name='gpt-4o', **kwargs):
    # Getting the base64 string
    base64_image = encode_image(image_path)
    
    # Set default detail if not provided in kwargs
    detail = kwargs.get('detail', 'low')
    
    response = client.chat.completions.create(
      model=vlm_name, #"gpt-4o-mini"
      messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt_text,
            },
            {
              "type": "image_url",
              "image_url": {
                "url":  f"data:image/jpeg;base64,{base64_image}",
                "detail": detail
              },
            },
          ],
        }
      ],
    )
    
    answer=response.choices[0].message.content
    
    
    # Extract and print token usage
    usage = response.usage.total_tokens
    
    return answer, usage

In [22]:
image_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
prompt_text = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

answer, usage = gpt_api_visual_inquiry(image_path, prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Lymphocyte
Tokens used: 217


In [24]:
def gpt_multiimage_api_visual_inquiry(image_paths, prompt_texts, vlm_name='gpt-4o', **kwargs):

    if len(image_paths) != len(prompt_texts):
        raise ValueError("The number of image paths and prompt texts must be the same.")

    def prepare_messages(prompt_texts, image_paths, detail="low"):
        messages = [
            {
                "role": "user",
                "content": []
            }
        ]
        
        for prompt_text, image_path in zip(prompt_texts, image_paths):
            # Getting the base64 string
            base64_image = encode_image(image_path)

            messages[0]["content"].append({"type": "text", "text": prompt_text})
            messages[0]["content"].append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}",
                    "detail": detail
                }
            })
        
        return messages
    
    # Set default detail if not provided in kwargs
    detail = kwargs.get('detail', 'low')

    messages = prepare_messages(prompt_texts, image_paths, detail)
    
    response = client.chat.completions.create(
      model=vlm_name, #"gpt-4o-mini"
      messages=messages
    )
    
    answer=response.choices[0].message.content
    
    
    # Extract and print token usage
    usage = response.usage.total_tokens
    
    return answer, usage


In [25]:
# Train:
train_path_0 = os.path.join(trainset_folder, "image_0.jpg") # It's a Myelocyte
train_prompt_0 = "The cell type in this image is Myelocyte"

train_path_1 = os.path.join(trainset_folder, "image_2.jpg") # It's a Platelet
train_prompt_1 = "The cell type in this image is Platelet"

train_path_2 = os.path.join(trainset_folder, "image_9.jpg") # It's a Lymphocyte
train_prompt_2 = "The cell type in this image is Lymphocyte"

# Test
test_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
test_prompt = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

image_paths = [train_path_0, train_path_1, train_path_2, test_path]
prompt_texts = [train_prompt_0, train_prompt_1, train_prompt_2, test_prompt]

answer, usage = gpt_multiimage_api_visual_inquiry(image_paths, prompt_texts)

print("Answer: " + answer)
print("Tokens used: " + str(usage))


Answer: Platelet
Tokens used: 500
