In [16]:
import getpass
import os
import base64
import json
import re

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [17]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [18]:
messages = [
    ("system", "You are a medical assistant. Return only a JSON object with two fields: type set to 'example', and summary set to 'This is a test.' Do not include any additional text, explanations, or formatting."),
    ("human", "Generate the JSON response."),
]
ai_msg = llm.invoke(messages)
print("Response content:", ai_msg.content)

try:
    json_response = json.loads(ai_msg.content)
    print("Parsed JSON:", json_response)
except json.JSONDecodeError as e:
    print(f"Failed to parse JSON: {e}. Content was: '{ai_msg.content}'")

Response content: ```json
{
  "type": "example",
  "summary": "This is a test."
}
```
Failed to parse JSON: Expecting value: line 1 column 1 (char 0). Content was: '```json
{
  "type": "example",
  "summary": "This is a test."
}
```'


In [None]:

# Path to the image
imagePath = "/Users/jan/documents/dev/cdtmhacks/cdtm-hacks-aviChallenge/data/docs/image copy.png"

# Encode the image in base64
with open(imagePath, "rb") as image_file:
    encoded_string = base64.b64encode(image_file.read()).decode('utf-8')

# Prepare the message with text and image
message = {
    "role": "user",
    "content": [
        {
            "type": "text",
            "text": "Analyze the document in this image and return only a JSON object with two fields: 'type' containing the type of document, and 'description' containing a brief description of the document's content and the date if possible. Do not include any additional text, explanations, or formatting."
        },
        {
            "type": "image",
            "source_type": "base64",
            "data": encoded_string,
            "mime_type": "image/png",
        },
    ],
}

# Invoke the model (assuming 'llm' is your multimodal model instance)
response = llm.invoke([message])
response_text = response.text()

print(response_text)

# Extract JSON from the response
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)

if json_match:
    json_str = json_match.group(0)
    try:
        json_response = json.loads(json_str)
        print("Parsed JSON:", json_response)
    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON: {e}. Content was: '{json_str}'")
else:
    print("No JSON found in response. Full responsez§x:", response_text)



```json
{
  "type": "Medical Report",
  "description": "A medical report detailing the diagnosis, treatment, and plan for a 70-year-old patient with coronary artery disease and myocardial ischemia, dated September 19, 2023."
}
```
Parsed JSON: {'type': 'Medical Report', 'description': 'A medical report detailing the diagnosis, treatment, and plan for a 70-year-old patient with coronary artery disease and myocardial ischemia, dated September 19, 2023.'}
