In [33]:
import base64
import os
import google.generativeai as genai
from dotenv import load_dotenv
import time

In [34]:

class GeminiVision():
    def __init__(self, model: str):
        load_dotenv()
        genai.configure(api_key=os.environ["GEMINI_API_KEY"])
        self.model = model
    
        
    def analyze_image(self, image_path: str, prompt: str) -> str:
        max_retries: int = 3
        initial_delay: float = 1.0

        # Create the model
        generation_config = {
            "temperature": 1,
            "top_p": 0.95,
            "top_k": 40,
            "max_output_tokens": 8192,
            "response_mime_type": "text/plain",
        }

        model = genai.GenerativeModel(
            model_name=self.model,
            generation_config=generation_config,
        )

        response = None
        last_exception = None

            
        files = [self.upload_to_gemini(image_path)]
        print(files[0])
        
        chat_session = model.start_chat(
            history=[
                {
                    "role": "user",
                    "parts": [files[0]],
                },
            ]
        )

        response = chat_session.send_message(prompt)
        # If we get here without an exception, we have a successful response
        return response

            

    def upload_to_gemini(self, path):
        """Uploads the given file to Gemini.

        See https://ai.google.dev/gemini-api/docs/prompting_with_media
        """
        file = genai.upload_file(path, mime_type="image/png" )
        print(f"Uploaded file '{file.display_name}' as: {file.uri}")
        return file

In [35]:
USER_PROMPT = """You will be provided with an image. 
    Your response should contain as much information as possible from this diagram. 
    It should contain a description of what type of image is (e.g. diagram, graph, flowchart, etc.) 
    and the data that comprises it.
    The response should be a valid JSON object as a string. 
    The JSON should necessarily have the following attributes:
    image_type, title, description
    But also if applicable the following:
    time_period, x-axis, y-axis, sources, sections, labels, ticks, key patterns
    Begin immediately with outputting the JSON object, do NOT prefix with any extra text, start straight with the json object, i.e. the first character should be {
    Do NOT suffix with any extra text, finish with the json object, i.e. the last character should be }
    """

In [36]:
gem = GeminiVision(model="gemini-2.0-flash-exp")
try:
    response = gem.analyze_image("/Users/la6387/workspace/sci-vizio-retrieval/output/extracted_images/1908.09635/page6_img2.png", USER_PROMPT)
    status = 200
except Exception as e:
    print(f"Failed to get response: {str(e)}")
    status = 500

Uploaded file 'page6_img2.png' as: https://generativelanguage.googleapis.com/v1beta/files/8tpuof5n6qri
genai.File({
    'name': 'files/8tpuof5n6qri',
    'display_name': 'page6_img2.png',
    'mime_type': 'image/png',
    'sha256_hash': 'ZTY5MzVlOGRkMWMxYTJlOGY3NjFhZjM5YmFjYTMxMjhmNmQ3OTM0MWI4NzBhMWVmOWQ4MGRjZDRkMzY3OTg3Nw==',
    'size_bytes': '230080',
    'state': 'ACTIVE',
    'uri': 'https://generativelanguage.googleapis.com/v1beta/files/8tpuof5n6qri',
    'create_time': '2025-01-09T22:33:11.096775Z',
    'expiration_time': '2025-01-11T22:33:11.046868912Z',
    'update_time': '2025-01-09T22:33:11.096775Z'})


In [37]:
response.text

'```json\n{\n  "image_type": "choropleth map",\n  "title": "Global Heatmap Visualization",\n  "description": "This choropleth map displays data across the world using a color gradient, with darker shades representing higher values and lighter shades representing lower values. The color key ranges from 1 to 642,997 indicating data magnitude.",\n    "key patterns": [\n        {\n            "color": "darkest black",\n            "data value": "642,997",\n            "region": "United States and Alaska"\n        },\n        {\n          "color": "medium gray",\n            "region": "United Kingdom"\n        },\n        {\n            "color": "light gray",\n          "region": "Australia, Japan and Canada"\n        }\n    ],\n  "labels": [\n    "United States",\n    "Alaska",\n    "Canada",\n    "United Kingdom",\n     "Australia",\n     "Japan"\n  ]\n}\n```\n'