In [1]:
from google import genai
from google.genai import types
import os
import utils.api_keys as api_keys
import csv
import time
from tqdm import tqdm

In [2]:
client = genai.Client(api_key=api_keys.gemini_api_key)
model = "gemini-2.5-flash-lite"
dataset = "cub"
output_file = os.path.join("outputs", "{}_{}_concepts.csv".format(model, dataset))
label_file = "CUB_classes.txt"
thinking_budget = 400 # Estimate How many tokens the model can use for "thinking"

prompt_answer_rules = "\n\n**Answer rules:**\n* Use only **bullet points**.\n* Each bullet must be a **single concept**, not a sentence.\n* If a feature has multiple aspects, split them into separate bullets.\n* Do not include explanations, context, or sentences — just the raw concepts.\n\n**Example format:**\n* <onewordconcept>\n* <multi word concept>\n* <more concepts>\n\n"

prompts = {
    "important" : "List the most important features for recognizing something as a \"goldfish\":\n\n-bright orange color\n-a small, round body\n-a long, flowing tail\n-a small mouth\n-orange fins\n\nList the most important features for recognizing something as a \"beerglass\":\n\n-a tall, cylindrical shape\n-clear or translucent color\n-opening at the top\n-a sturdy base\n-a handle\n\nList the most important features for recognizing something as a \"{}\":",
    "superclass" : "Give superclasses for the word \"tench\":\n\n-fish\n-vertebrate\n-animal\n\nGive superclasses for the word \"beer glass\":\n\n-glass\n-container\n-object\n\nGive superclasses for the word \"{}\":",
    "around" : "List the things most commonly seen around a \"tench\":\n\n- a pond\n-fish\n-a net\n-a rod\n-a reel\n-a hook\n-bait\n\nList the things most commonly seen around a \"beer glass\":\n\n- beer\n-a bar\n-a coaster\n-a napkin\n-a straw\n-a lime\n-a person\n\nList the things most commonly seen around a \"{}\":"
}

prompts = {
    "important" : "List the most important features for recognizing something as a \"{}\", based on an image.",
    "superclass" : "Give superclasses for the word \"{}\", the categories should be explained in simple, layman-friendly language and should highlight how this bird differs from other types of birds:",
    "around" : "List the things most commonly seen around a \"{}\":"
}
#\n* Acceptable forms: **adjective + noun** (e.g., “curved edge”) or a **single noun or verb** (e.g., “stripped”).
labels = ["Black_footed_Albatross", "Cardinal", "Vermilion_Flycatcher"]



In [3]:
# load classes from label file if the label file exist
if os.path.exists(label_file):
    labels = []
    print("Loading labels from file:", label_file)
    with open(label_file, "r") as f:
        for line in f:
            # Split on first dot, take part after it
            parts = line.strip().split(".", 1)
            if len(parts) == 2:
                labels.append(parts[1])

loading labels from file: CUB_classes.txt


In [4]:
total_token_count = 0

pbar = tqdm(labels)
for label in pbar:
    for prompt in prompts.values():
        pbar.set_description(f"Total tokens used: {total_token_count}")
        cur_prompt = prompt.format(label) + prompt_answer_rules

        response = client.models.generate_content(
            model=model,
            contents=cur_prompt,
            config=types.GenerateContentConfig(
                thinking_config=types.ThinkingConfig(thinking_budget=600) # Thinking https://ai.google.dev/gemini-api/docs/rate-limits
            ),
        )
        total_token_count += response.usage_metadata.total_token_count

        # used csv instead of json for easier appending if model run is interrupted
        with open(output_file, "a", newline="") as f:
            data = [
                label,
                cur_prompt,
                response.text,
                response.usage_metadata.total_token_count
            ]
            writer = csv.writer(f, quoting=csv.QUOTE_ALL)
            writer.writerow(data)
        time.sleep(3)  # To avoid rate limiting

Total tokens used: 378848: 100%|██████████| 200/200 [1:16:54<00:00, 23.07s/it]
