In [1]:
import PIL.Image
import os
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
project_root_folder = "/home/ivan/Helmholtz/Conferences/APIworkshopHelmholtz2025/"
dataset_folder = os.path.join(project_root_folder, "AcevedoDataSet")
testset_folder =  os.path.join(dataset_folder, "test")
trainset_folder = os.path.join(dataset_folder, "train")
valset_folder = os.path.join(dataset_folder, "val")

In [3]:
def gemini_api_text_inquiry(prompt_text, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    model = genai.GenerativeModel(model_name=vlm_name)
    
    response = model.generate_content(prompt_text)

    answer = response.text

    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [4]:
prompt_text = "If you were old school, which videogame would you play? What machine learning model would you use to beat the game?"

answer, usage = gemini_api_text_inquiry(prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Okay, if I were going old school, I'd definitely choose **Space Invaders**. It's simple, iconic, and *deceptively* challenging.

Now, for the machine learning model... I'd go with **Q-Learning**, likely with some enhancements to address the scaling issues that Space Invaders might present. Here's why, and how I'd approach it:

**Why Q-Learning?**

*   **Discrete Action Space:** Space Invaders has a relatively small and discrete action space:  move left, move right, shoot (or sometimes "do nothing").  Q-Learning thrives in environments with discrete actions.
*   **Markov Decision Process (MDP):**  Space Invaders can be framed as an MDP. The current game state gives me (the AI) all the information needed to make an optimal decision.  Future states depend only on the current state and my action.
*   **Relatively Simple Environment:**  While challenging for humans, the rules of Space Invaders are fixed and relatively straightforward.  This makes it easier to design a suitable rewar

In [6]:
def gemini_api_visual_inquiry(image_path, prompt_text, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    image = PIL.Image.open(image_path)

    #Choose a Gemini model.
    model = genai.GenerativeModel(model_name=vlm_name)

    response = model.generate_content([prompt_text, image])

    answer = response.text
    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [7]:
image_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
prompt_text = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

answer, usage = gemini_api_visual_inquiry(image_path, prompt_text)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Lymphocyte

Tokens used: 378


In [8]:
def gemini_multiimage_api_visual_inquiry(image_paths, prompt_texts, vlm_name='gemini-2.0-flash-exp', **kwargs): #'gemini-1.5-pro'

    if len(image_paths) != len(prompt_texts):
        raise ValueError("The number of image paths and prompt texts must be the same.")

    messages = []

    for image_path, prompt_text in zip(image_paths, prompt_texts):  
        image = PIL.Image.open(image_path)

        messages.append(image)
        messages.append(prompt_text)
        
    #Choose a Gemini model.
    model = genai.GenerativeModel(model_name=vlm_name)

    response = model.generate_content(messages)

    answer = response.text
    usage = response.usage_metadata.total_token_count
    
    return answer, usage

In [9]:
# Train:
train_path_0 = os.path.join(trainset_folder, "image_0.jpg") # It's a Myelocyte
train_prompt_0 = "The cell type in this image is Myelocyte"

train_path_1 = os.path.join(trainset_folder, "image_2.jpg") # It's a Platelet
train_prompt_1 = "The cell type in this image is Platelet"

train_path_2 = os.path.join(trainset_folder, "image_9.jpg") # It's a Lymphocyte
train_prompt_2 = "The cell type in this image is Lymphocyte"

# Test
test_path = os.path.join(testset_folder, "image_38.jpg") # It's a Platelet
test_prompt = """Consider the input image. Take a moment to think. Consider what features do the cells in the image have. Which of the white blood cell types listed below is shown? 
    Write just the cell type and nothing else. Choose one of the possible labels provided below (exactly as written here):
    Band Neutrophil
    Basophil
    Eosinophil
    Erythroblast
    Lymphocyte
    Metamyelocyte
    Monocyte
    Myelocyte
    Platelet
    Promyelocyte
    Segmented Neutrophil"""

image_paths = [train_path_0, train_path_1, train_path_2, test_path]
prompt_texts = [train_prompt_0, train_prompt_1, train_prompt_2, test_prompt]

answer, usage = gemini_multiimage_api_visual_inquiry(image_paths, prompt_texts)

print("Answer: " + answer)
print("Tokens used: " + str(usage))

Answer: Lymphocyte
Tokens used: 1179
