In [1]:
import PIL.Image
import os
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [18]:
project_root_folder = "/home/ivan/Helmholtz/Conferences/APIworkshopHelmholtz2025/"
dataset_folder = os.path.join(project_root_folder, "AcevedoDataSet")
testset_folder =  os.path.join(dataset_folder, "test")
trainset_folder = os.path.join(dataset_folder, "train")
valset_folder = os.path.join(dataset_folder, "val")

In [11]:
def gemini_api_text_inquiry(prompt_text, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    model = genai.GenerativeModel(model_name=vlm_name)
    
    response = model.generate_content(prompt_text)

    answer = response.text

    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [12]:
prompt_text = "If you were old school, which videogame would you play? What machine learning model would you use to beat the game?"

answer, usage = gemini_api_text_inquiry(prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Okay, if I were going old school with a game and then using a machine learning model to conquer it, here's what I'd do:

**The Game:** **Ms. Pac-Man (Atari 2600 Version)**

*   **Why Ms. Pac-Man (Atari 2600)?**
    *   **Simple Rules, Complex Strategies:** The core gameplay is simple to understand (eat pellets, avoid ghosts), but mastering it requires intricate path planning and prediction of ghost behavior.
    *   **Limited State Space:**  Compared to modern games, the game state is relatively small (screen size, ghost positions, pellet positions, etc.), making it more amenable to training a machine learning model with reasonable resources.
    *   **Classic Arcade AI Challenge:** The original ghost AI, while iconic, is predictable and exploitable. A machine learning model can learn these patterns and surpass human performance.
    *   **Nostalgia!** Let's be honest, it's a classic.

**The Machine Learning Model:** **Deep Q-Network (DQN)**

*   **Why DQN?**
    *   **Reinforc

In [19]:
def gemini_api_visual_inquiry(image_path, prompt_text, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    image = PIL.Image.open(image_path)

    #Choose a Gemini model.
    model = genai.GenerativeModel(model_name=vlm_name)

    response = model.generate_content([prompt_text, image])

    answer = response.text
    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [20]:
image_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
prompt_text = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

answer, usage = gemini_api_visual_inquiry(image_path, prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Lymphocyte

Tokens used: 378


In [21]:
def gemini_multiimage_api_visual_inquiry(image_paths, prompt_texts, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    if len(image_paths) != len(prompt_texts):
        raise ValueError("The number of image paths and prompt texts must be the same.")

    messages = []

    for image_path, prompt_text in zip(image_paths, prompt_texts):  
        image = PIL.Image.open(image_path)

        messages.append(image)
        messages.append(prompt_text)
        
    #Choose a Gemini model.
    model = genai.GenerativeModel(model_name=vlm_name)

    response = model.generate_content(messages)

    answer = response.text
    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [22]:
# Train:
train_path_0 = os.path.join(trainset_folder, "image_0.jpg") # It's a Myelocyte
train_prompt_0 = "The cell type in this image is Myelocyte"

train_path_1 = os.path.join(trainset_folder, "image_2.jpg") # It's a Platelet
train_prompt_1 = "The cell type in this image is Platelet"

train_path_2 = os.path.join(trainset_folder, "image_9.jpg") # It's a Lymphocyte
train_prompt_2 = "The cell type in this image is Lymphocyte"

# Test
test_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
test_prompt = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

image_paths = [train_path_0, train_path_1, train_path_2, test_path]
prompt_texts = [train_prompt_0, train_prompt_1, train_prompt_2, test_prompt]

answer, usage = gemini_multiimage_api_visual_inquiry(image_paths, prompt_texts)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Lymphocyte
Tokens used: 1179
