Licensed under the Apache License, Version 2.0

# Params

In [None]:
IMAGES_DIRECTORY_PATH = 'extracted_images'  #@param {type:"string"}
SDMC_PICKLE_FILE_PATH = 'sdmc.pkl'  #@param {type:"string"}
FEW_SHOT_IMAGES_DIRECTORY_PATH = 'few_shot_images'  #@param {type:"string"}
CLD_SDMC_PARSED_PICKLE_FILE_PATH = 'cld_sdmc_parsed_dict.pkl'  #@param {type:"string"}

GEMINI_MODEL = 'gemini-1.5-flash-latest'  #@param {type:"string"}
GEMINI_API_KEY = ''  #@param {type:"string"}

# Imports

In [None]:
import os
from uuid import uuid4
from io import BytesIO

In [None]:
import google.generativeai as genai
import google.api_core.retry

In [None]:
from IPython import display

In [None]:
import os
import PIL.Image as Image

In [None]:
import json
import pandas as pd

In [None]:
import pickle
import time

In [None]:
import re

# Configure model

In [None]:
genai.configure(api_key=GEMINI_API_KEY)

In [None]:
for item in genai.list_models():
  print(item.name, ":", item.display_name)

In [None]:
generation_config = {
    "temperature": 0,
    "top_p": 0.95,
    "top_k": 5,
}

In [None]:
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_NONE"
  },
]

In [None]:
model = genai.GenerativeModel(model_name=GEMINI_MODEL,
                              generation_config=generation_config,
                              safety_settings=safety_settings)

# Read extracted images from research papers

In [None]:
files = gfile.ListDirectory(IMAGES_DIRECTORY_PATH)

In [None]:
len(files)

## Override files for testing

In [None]:
# files = files[:10]
# files

# Classify CLD, SFD, None

## Few-shot examples for prompting

In [None]:
sdmc_prompt1_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'O1123_page3_img0.png')
sdmc_prompt1_image = Image.open(sdmc_prompt1_image_path, 'rb')

In [None]:
sdmc_prompt1 = "causal_loop_diagram"

In [None]:
sdmc_prompt2_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1105_page9_img0.png')
sdmc_prompt2_image = Image.open(sdmc_prompt2_image_path, 'rb')

In [None]:
sdmc_prompt2 = "stock_and_flow_diagram"

In [None]:
sdmc_prompt3_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1105_page11_img0.png')
sdmc_prompt3_image = Image.open(sdmc_prompt3_image_path, 'rb')

In [None]:
sdmc_prompt3 = "causal_loop_diagram"

In [None]:
sdmc_prompt4_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1105_page4_img0.png')
sdmc_prompt4_image = Image.open(sdmc_prompt4_image_path, 'rb')

In [None]:
sdmc_prompt4 = "none"

In [None]:
sdmc_prompt5_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1114_page26_img1.png')
sdmc_prompt5_image = Image.open(sdmc_prompt5_image_path, 'rb')

In [None]:
sdmc_prompt5 = "none"

## Build and test prompt

In [None]:
sdmc_target_prompt = """**Image Classification Prompt**

Please analyze the provided image and classify it based on the following categories:

* **causal_loop_diagram:** If the image primarily depicts variables connected by arrows indicating cause-and-effect relationships, often with positive (+) and negative (-) signs to denote reinforcing or balancing loops. It does not contain any elements from the stock and flow diagram (example: it will not contain even a single stock or flow element).
* **stock_and_flow_diagram:** If the image showcases stocks (represented by rectangles) and flows (represented by arrows) illustrating the accumulation and transfer of quantities or resources over time.
* **none:** If the image does not fit any of the above descriptions and does not represent a diagram commonly used in systems thinking or dynamic modeling.

**Please respond with ONLY the most accurate classification from the list above. Reply with "type" and "reasoning". Do not include any additional text.**

Example:

* If the image is a causal loop diagram, respond with
type: "causal_loop_diagram".
reasoning: <your reasoning here>."""

In [None]:
sdmc_target_test_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1116_page2_img1.png')
sdmc_target_test_image = Image.open(sdmc_target_test_image_path, 'rb')
sdmc_test_prompt_all = [
    sdmc_prompt1, sdmc_prompt1_image,
    sdmc_prompt2, sdmc_prompt2_image,
    sdmc_prompt3, sdmc_prompt3_image,
    sdmc_prompt4, sdmc_prompt4_image,
    sdmc_prompt5, sdmc_prompt5_image,
    sdmc_target_prompt, sdmc_target_test_image
]
display.display(sdmc_target_test_image)
response = model.generate_content(sdmc_test_prompt_all)
print(response.text)

## Execute

In [None]:
# Rate limiting parameters
REQUESTS_PER_MINUTE = 140
SECONDS_PER_MINUTE = 60

def generate_content_with_rate_limit(model, prompt, request_options=None):
  """Generates content with rate limiting."""
  global last_request_time
  while time.time() - last_request_time < SECONDS_PER_MINUTE / REQUESTS_PER_MINUTE:
    time.sleep(0.01)  # Sleep briefly to avoid busy waiting

  try:
    response = model.generate_content(prompt, request_options=request_options)
    last_request_time = time.time()
    return response
  except Exception as e:
    print(f"Error during generation: {e}")
    return None


last_request_time = 0  # Track the last request time

In [None]:
files_sdmc_dict = {}
try:
    with open(SDMC_PICKLE_FILE_PATH, 'rb') as f:
        files_sdmc_dict = pickle.load(f)
except:
    pass

start_index = len(files_sdmc_dict)

# Counter for processed files
processed_files_count = 0

for i, file in enumerate(files[start_index:]):
    sdmc_target_image_path = os.path.join(IMAGES_DIRECTORY_PATH, file)
    try:
        sdmc_target_image = Image.open(sdmc_target_image_path, 'rb')
        sdmc_prompt_all = [
            sdmc_prompt1, sdmc_prompt1_image,
            sdmc_prompt2, sdmc_prompt2_image,
            sdmc_prompt3, sdmc_prompt3_image,
            sdmc_prompt4, sdmc_prompt4_image,
            sdmc_prompt5, sdmc_prompt5_image,
            sdmc_target_prompt, sdmc_target_image
        ]
        response = generate_content_with_rate_limit(
            model, sdmc_prompt_all,
            request_options={
                'retry': google.api_core.retry.Retry(deadline=600)
            }
        )

        if response:
          files_sdmc_dict[file] = response.text
          processed_files_count += 1

          # Dump to pickle every 1000 files in case of a crash
          if processed_files_count % 1000 == 0 :
              with open(SDMC_PICKLE_FILE_PATH, 'wb') as f:
                  pickle.dump(files_sdmc_dict, f)
              print(f"Saved {processed_files_count} files to {SDMC_PICKLE_FILE_PATH}")

    except Exception as e:
        print(f"Error processing {file}: {e}")
        # Optionally handle the error, e.g., skip the file or retry


# Save the remaining files after the loop
with open(SDMC_PICKLE_FILE_PATH, 'wb') as f:
    pickle.dump(files_sdmc_dict, f)
print(f"Saved remaining files to {SDMC_PICKLE_FILE_PATH}")

In [None]:
with open(SDMC_PICKLE_FILE_PATH, 'rb') as f:
  loaded_sdmc = pickle.load(f)
loaded_sdmc

# Convert CLDs to JSON

## Few-shot prompts and images

In [None]:
prompt1_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'O1123_page3_img0.png')
prompt1_image = Image.open(prompt1_image_path, 'rb')

In [None]:
prompt1_annotated_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'O1123_page3_img0_annotated.png')
prompt1_annotated_image = Image.open(prompt1_annotated_image_path, 'rb')

In [None]:
prompt1_annotated = """INSTRUCTIONS: The following JSON describes variables, and causal links for the attached causal loop diagram.

"variables": All nodes in the diagram where one or more causal links (arrows) originate or terminate. These are the core elements that influence or are influenced by other elements in the system. I will ignore any shapes around the text, as some variables are not in a box.
"causal_links": An array of dictionaries representing all the links in the diagram, where each link consists of only one arrow. There is no link if there is no arrow. The arrow may be a solid line or dashed line. Each dictionary has: 2.1) "from": Text at the arrow's start. 2.2) "to": Text at the arrow's head. 2.3) "polarity": + or - sign, if available The polarity sign may be labeled anywhere on the causal link. If no sign is present, record it as "unknown.". I will ignore any coloring and won't make any assumptions about the polarity. 2.4) "id": The causal link's ID (generate a unique ID for each causal link).

Attached is also an annotated version of the diagram with image segmentation applied. This is an intermediate step to identify all entities before generating the JSON. A variable is a green bouding box, the beginning of a causal link is a blue bounding box, the end of a causal link is a red bounding box, polarity is a burgundy bounding circle.

  JSON OUTPUT:
    {
      "variables": ["climate impact Dutch energy system", "energy intensity society", "sustainable economic activities", "sustainable innovation", "effective climate policies", "climate intensity energy mix""],
      "causal_links": [
        {
          "id": "1234",
          "from": "energy intensity society",
          "polarity": "+",
          "to": "climate impact Dutch energy system",
        },
        {
          "id": "5678",
          "from": "sustainable economic activities",
          "polarity": "_",
          "to": "energy intensity society",
        },
        {
          "id": "9012",
          "from": "sustainable innovation",
          "polarity": "+",
          "to": "sustainable economic activities
        },
        {
          "id": "3456",
          "from": "effective climate policies",
          "polarity": "+",
          "to": "sustainable innovation",
        },
        {
          "id": "2345",
          "from": "effective climate policies",
          "polarity": "-",
          "to": "climate intensity energy mix",
        },
        {
          "id": "4567",
          "from": "climate intensity energy mix",
          "polarity": "+",
          "to": "climate impact Dutch energy system",
        },
        {
          "id": "6789",
          "from": "effective climate policies",
          "polarity": "+",
          "to": "sustainable economic activities",
        },
      ],
    }

"""

In [None]:
prompt2_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'O1123_page4_img0.png')
prompt2_image = Image.open(prompt2_image_path, 'rb')

In [None]:
prompt2_annotated_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'O1123_page4_img0_annotated.png')
prompt2_annotated_image = Image.open(prompt2_annotated_image_path, 'rb')

In [None]:
prompt2_annotated = """INSTRUCTIONS: The following JSON describes variables, and causal links for the attached causal loop diagram.

"variables": All nodes in the diagram where one or more causal links (arrows) originate or terminate. These are the core elements that influence or are influenced by other elements in the system. I will ignore any shapes around the text, as some variables are not in a box.
"causal_links": An array of dictionaries representing all the links in the diagram, where each link consists of only one arrow. There is no link if there is no arrow. The arrow may be a solid line or dashed line. Each dictionary has: 2.1) "from": Text at the arrow's start. 2.2) "to": Text at the arrow's head. 2.3) "polarity": + or - sign, if available The polarity sign may be labeled anywhere on the causal link. If no sign is present, record it as "unknown.". I will ignore any coloring and won't make any assumptions about the polarity. 2.4) "id": The causal link's ID (generate a unique ID for each causal link).

Attached is also an annotated version of the diagram with image segmentation applied. This is an intermediate step to identify all entities before generating the JSON. A variable is a green bouding box, the beginning of a causal link is a blue bounding box, the end of a causal link is a red bounding box, polarity is a burgundy bounding circle.

  JSON OUTPUT:
    {
      "variables": ["sustainable economic activities", "value of fossil assets", "capacity to change", "effective climate policies", "sustainable innovation", "agility of firms", "trust in government", "perceived fairness of the energy transition", "perceived urgency climate mitigation", "societal support for effective climate policies"],
      "causal_links": [
        {
          "from": "sustainable economic activities",
          "polarity": "-",
          "to": "value of fossil assets",
          "id": "1234",
        },
        {
          "from": "value of fossil assets",
          "polarity": "-",
          "to": "agility of firms",
          "id": "5678",
        },
        {
          "from": "agility of firms",
          "polarity": "+",
          "to": "sustainable economic activities"
          "id": "9012",
        },
        {
          "from": "value of fossil assets",
          "polarity": "-",
          "to": "capacity to change",
          "id": "2345",
        },
        {
          "from": "capacity to change",
          "polarity": "+",
          "to": "effective climate policies"
          "id": "4567",
        },
        {
          "from": "effective climate policies",
          "polarity": "+",
          "to": "sustainable economic activities"
          "id": "6789",
        },
        {
          "from": "effective climate policies",
          "polarity": "+",
          "to": "trust in government"
          "id": "3456",
        },
        {
          "from": "trust in government",
          "polarity": "+",
          "to": "perceived fairness of the energy transition"
          "id": "3824",
        },
        {
          "from": "perceived fairness of the energy transition",
          "polarity": "+",
          "to": "societal support for effective climate policies",
          "id": "2343",
        },
        {
          "from": "societal support for effective climate policies",
          "polarity": "+",
          "to": "effective climate policies",
          "id": "6786",
        },
        {
          "from": "perceived urgency climate mitigation",
          "polarity": "+",
          "to": "societal support for effective climate policies",
          "id": "2344",
        },
      ]
    }

"""

In [None]:
prompt3_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1050_page4_img1.png')
prompt3_image = Image.open(prompt3_image_path, mode='rb')

In [None]:
prompt3_annotated_image_path = os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'P1050_page4_img1_annotated.png')
prompt3_annotated_image = Image.open(prompt3_annotated_image_path, mode='rb')

In [None]:
prompt3_annotated = """INSTRUCTIONS: The following JSON describes variables, and causal links for the attached causal loop diagram.

"variables": All nodes in the diagram where one or more causal links (arrows) originate or terminate. These are the core elements that influence or are influenced by other elements in the system. I will ignore any shapes around the text, as some variables are not in a box.
"causal_links": An array of dictionaries representing all the links in the diagram, where each link consists of only one arrow. There is no link if there is no arrow. The arrow may be a solid line or dashed line. Each dictionary has: 2.1) "from": Text at the arrow's start. 2.2) "to": Text at the arrow's head. 2.3) "polarity": + or - sign, if available The polarity sign may be labeled anywhere on the causal link. If no sign is present, record it as "unknown.". I will ignore any coloring and won't make any assumptions about the polarity. 2.4) "id": The causal link's ID (generate a unique ID for each causal link).

Attached is also an annotated version of the diagram with image segmentation applied. This is an intermediate step to identify all entities before generating the JSON. A variable is a green bouding box, the beginning of a causal link is a blue bounding box, the end of a causal link is a red bounding box, polarity is a burgundy bounding circle.

  JSON OUTPUT:
    {
      "variables": ["Health care provided to low income population", "Nonprofit hospitals in affluent areas", "Population access to private insurance", "Nonprofit hospitals in low income areas", Hospital revenue", "Tax breaks from nonprofit status"],
      "causal_links": [
        {
          "id": "1234",
          "from": "Nonprofit hospitals in low income areas",
          "polarity": "+",
          "to": "Health care provided to low income population"
        },
        {
          "id": "5678",
          "from": "Health care provided to low income population",
          "polarity": "-",
          "to": Hospital revenue"
        },
        {
          "id": "9012",
          "from": "Hospital revenue",
          "polarity": "+",
          "to": "Nonprofit hospitals in low income areas"
        },
        {
          "id": "4567",
          "from": "Nonprofit hospitals in affluent areas",
          "polarity": "-",
          "to": "Health care provided to low income population"
        },
        {
          "id": "3456",
          "from": "Hospital revenue",
          "polarity": "+",
          "to": "Nonprofit hospitals in affluent areas"
        },
        {
          "id": "3824",
          "from": "Nonprofit hospitals in affluent areas",
          "polarity": "+",
          "to": "Population access to private insurance"
        },
        {
          "id": "2343",
          "from": "Population access to private insurance",
          "polarity": "+",
          "to": "Hospital revenue"
        },
        {
          "id": "3453",
          "from": "Nonprofit hospitals in low income areas",
          "polarity": "-",
          "to": "Population access to private insurance"
        },
        {
          "id": "2949",
          "from": "Tax breaks from nonprofit status",
          "polarity": "+",
          "to": "Hospital revenue"
        },
      ],
    }

"""

### Build and test prompt

In [None]:
cld_sdmc_target_prompt = """
TASK: Now let's solve this step-by-step. Analyze the attached causal loop diagram, generate a segmentated image with colored bounding boxes identifying the different types of entities, and use that to provide your JSON response following the structure.
Provide reasoning, for each step in the step by step reasoning field after the JSON response. Verify that the number of elements you provide matches the number of elements you count in the diagram.
"""

In [None]:
target_image = Image.open(os.path.join(FEW_SHOT_IMAGES_DIRECTORY_PATH, 'S1240_page4_img0.png'), "rb")

prompt_annotated = [
  prompt1_annotated, prompt1_image, prompt1_annotated_image,
  prompt2_annotated, prompt2_image, prompt2_annotated_image,
  prompt3_annotated, prompt3_image, prompt3_annotated_image,
  cld_sdmc_target_prompt, target_image
]

response = model.generate_content(prompt_annotated,
                                  request_options={
                                      'retry': google.api_core.retry.Retry(deadline=600)
                                  })

display.display(target_image)
print(response.text)
for part in response.parts:
  if "inline_data" in part:
    print(part.inline_data)

## Execute

In [None]:
JSON_PATTERN = re.compile(r'.*```json\n(.*)\n```.*', re.S)

In [None]:
filtered_cld_sdmc_dict = {k:v for (k, v) in files_sdmc_dict.items() if "causal_loop_diagram" in v}

In [None]:
error_count = 0
cld_sdmc_parsed_dict = {}

try:
  cld_sdmc_parsed_dict = pickle.load(CLD_SDMC_PARSED_PICKLE_FILE_PATH)
except:
  pass

for file in filtered_cld_sdmc_dict.keys():
  if file in cld_sdmc_parsed_dict and cld_sdmc_parsed_dict[file] != "504 Deadline Exceeded":
    print(f"Skipping already processed file: {file}")
    continue
  else:
    print(f"Processing file: {file}")

  data_dict = {}
  target_image_path = os.path.join(IMAGES_DIRECTORY_PATH, file)
  try:
    target_image = Image.open(target_image_path, "rb")

    prompt_annotated = [
      prompt1_annotated, prompt1_image, prompt1_annotated_image,
      prompt2_annotated, prompt2_image, prompt2_annotated_image,
      prompt3_annotated, prompt3_image, prompt3_annotated_image,
      cld_sdmc_target_prompt, target_image
    ]

    response = model.generate_content(prompt_annotated,
                                      request_options={
                                          'retry': google.api_core.retry.Retry(deadline=600)
                                      })
    # disable for batch run
    # display.display(target_image)
    # print(response.text)
    # for part in response.parts:
    #   if "inline_data" in part:
    #     print(part.inline_data)

    json_string = response.text
    json_string_match = JSON_PATTERN.match(json_string)
    if json_string_match:
      json_string = json_string_match.group(1)

    data_dict = json.loads(json_string)
    cld_sdmc_parsed_dict[file] = data_dict

    error_count = 0
  except Exception as e:
    print(file)
    print(e)
    cld_sdmc_parsed_dict[file] = str(e)

    if str(e) == "504 Deadline Exceeded":
      error_count += 1
      if error_count == 10:
        print('Too many deadline exceeded errors. Waiting 30 mins before retry.')
        time.sleep(1800)
        error_count += 1
      if error_count == 11:
        print('Too many deadline exceeded errors even after 30 mins backoff. Exiting')
        break

# Save parsed CLDs
with open(CLD_SDMC_PARSED_PICKLE_FILE_PATH, 'wb') as f:
    pickle.dump(cld_sdmc_parsed_dict, f)
print(f"Saved parsed CLDs to {CLD_SDMC_PARSED_PICKLE_FILE_PATH}")

In [None]:
with open(CLD_SDMC_PARSED_PICKLE_FILE_PATH, 'rb') as f:
  loaded_cld_sdmc = pickle.load(f)
loaded_cld_sdmc