In [1]:
from pydantic import BaseModel, Field
from typing import List, Literal
import dotenv
import base64
from anthropic import Anthropic
from pydantic_ai import Agent, RunContext
from dataclasses import dataclass
from pydantic_ai.models.anthropic import AnthropicModel

def export_json(data):
    import json
    with open('output.json', 'w', encoding='utf8') as f:
        json.dump(data, f, indent=4)

anthropic_api_key = dotenv.get_key(dotenv_path=".env", key_to_get="ANTHROPIC_API_KEY")
if anthropic_api_key is None:
    Exception("APi key not available")


class Node(BaseModel):
    """Structure of a node in the diagram"""

    id: str = Field(description="Id of the Node")
    type_of_node: Literal["process", "decision", "delay", "terminator","start"] | str = Field(
        description="The type of node"
    )
    label: str = Field(description="Label of the node")


class Edge(BaseModel):
    """Structure of a edge in the diagram"""

    from_: str = Field(description="The ID of the edge's starting node")
    to: str = Field(description="The ID of edge's end node")
    type_of_edge: Literal["dashed", "solid"] = Field( default="solid",
        description="The type of edge, visually"
    )


class Graph(BaseModel):
    """Structure of the graph representing the diagram"""

    nodes: List[Node] = Field(description="Nodes from the diagram")
    edges: List[Edge] = Field(description="Edges from the diagram")


class MultiModalLLMService:
    """Service to interact with Anthropic multimodal LLMs."""

    def __init__(self, model: str):
        self.client = Anthropic(api_key=anthropic_api_key)
        self.model = model

    async def perform_task(
        self, image_path: str, response_model: type, max_tokens: int = 1000
    ):
        """Send an image and prompt to the LLM and return structured output."""
        with open(image_path, "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")

        message_list = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/png",
                            "data": base64_image,
                        },
                    },
                ],
            }
        ]
        response = self.client.messages.create( 
            model=self.model,
            max_tokens=max_tokens,
            messages=message_list,
        )
        return response.content[0].text


@dataclass
class DiagramDigitizerDependencies:
    llm_service: MultiModalLLMService
    diagram_path: str


diagram_digitizer_agent = Agent(
    AnthropicModel("claude-3-5-sonnet-latest", api_key=anthropic_api_key),
    deps_type=DiagramDigitizerDependencies,
    result_type=Graph,
    system_prompt="You are a data scientist and you are working on a project to extract information from a diagram in json format. Which is compatiable with knowledge graph databases. Consider their shape and translate it's purpose(process, decision, etc.) as it is important for the data extraction. only give the json format of the diagram.",
)


@diagram_digitizer_agent.tool
async def extract_diagram_info(ctx: RunContext[DiagramDigitizerDependencies]) -> Graph:
    """Tool to extract diagram information details from the image"""
    return await ctx.deps.llm_service.perform_task(
        image_path=ctx.deps.diagram_path, response_model=Graph
    )

result = None
async def main():
    global result
    deps = DiagramDigitizerDependencies(
        llm_service=MultiModalLLMService(model="claude-3-5-sonnet-20241022"),
        diagram_path="./dataset/kamizuru/6.png",
    )
    
    result = await diagram_digitizer_agent.run(
        "Extract the details from the image", deps=deps
    )
    export_json(result.data.model_dump_json())
    print(result.usage)
    print("=" * 100)


await main()

NameError: name 'export_json' is not defined

In [16]:
export_json(result.data.model_dump())

In [3]:
result.data.model_dump()

{'nodes': [{'id': '1', 'type_of_node': 'start', 'label': 'Start'},
  {'id': '2',
   'type_of_node': 'decision',
   'label': 'Target Sector < Max Sector?'},
  {'id': '3', 'type_of_node': 'process', 'label': 'Increment Target Sector'},
  {'id': '4', 'type_of_node': 'process', 'label': 'Target Sector = 0'},
  {'id': '5', 'type_of_node': 'decision', 'label': 'Update Wanted?'},
  {'id': '6', 'type_of_node': 'process', 'label': 'Increment Target Head'},
  {'id': '7', 'type_of_node': 'process', 'label': 'Target Head = 0'},
  {'id': '8', 'type_of_node': 'process', 'label': 'Increment'},
  {'id': '9', 'type_of_node': 'terminator', 'label': 'End'}],
 'edges': [{'from_': '1', 'to': '2', 'type_of_edge': 'solid'},
  {'from_': '2', 'to': '3', 'type_of_edge': 'solid'},
  {'from_': '2', 'to': '4', 'type_of_edge': 'solid'},
  {'from_': '4', 'to': '5', 'type_of_edge': 'solid'},
  {'from_': '5', 'to': '6', 'type_of_edge': 'solid'},
  {'from_': '5', 'to': '7', 'type_of_edge': 'solid'},
  {'from_': '3', 't

In [3]:
data

{'nodes': [{'id': '1', 'type_of_node': 'start', 'label': 'Start'},
  {'id': '2',
   'type_of_node': 'decision',
   'label': 'Target Sector < Max Sector?'},
  {'id': '3', 'type_of_node': 'process', 'label': 'Increment Target Sector'},
  {'id': '4', 'type_of_node': 'process', 'label': 'Target Sector = 0'},
  {'id': '5', 'type_of_node': 'decision', 'label': 'Update Wanted?'},
  {'id': '6', 'type_of_node': 'process', 'label': 'Increment Target Head'},
  {'id': '7', 'type_of_node': 'process', 'label': 'Target Head = 0'},
  {'id': '8', 'type_of_node': 'process', 'label': 'Increment'},
  {'id': '9', 'type_of_node': 'terminator', 'label': 'End'}],
 'edges': [{'from_': '1', 'to': '2', 'type_of_edge': 'solid'},
  {'from_': '2', 'to': '3', 'type_of_edge': 'solid'},
  {'from_': '2', 'to': '4', 'type_of_edge': 'solid'},
  {'from_': '4', 'to': '5', 'type_of_edge': 'solid'},
  {'from_': '5', 'to': '6', 'type_of_edge': 'solid'},
  {'from_': '5', 'to': '7', 'type_of_edge': 'solid'},
  {'from_': '3', 't

In [3]:
from neo4j import GraphDatabase

In [4]:
URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "vava7878")


In [5]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    # driver.verify_connectivity()
    records,a, b =driver.execute_query("MATCH (n) RETURN (n)")
    for record in records:
        print(record)

<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:0' labels=frozenset({'Node'}) properties={'id': 'OrderReceived', 'label': 'Order received', 'type': 'start'}>>
<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:1' labels=frozenset({'Node'}) properties={'id': 'EnterOrderInSystem', 'label': 'Enter order in system', 'type': 'process'}>>
<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:2' labels=frozenset({'Node'}) properties={'id': 'CreditCheck', 'label': 'Credit check', 'type': 'process'}>>
<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:3' labels=frozenset({'Node'}) properties={'id': 'IsCreditGood', 'label': 'Is credit good?', 'type': 'decision'}>>
<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:4' labels=frozenset({'Node'}) properties={'id': 'RefuseOrder', 'label': 'Refuse order', 'type': 'output'}>>
<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:5' labels=frozenset({'Node'}) p

In [8]:
from neo4j import GraphDatabase

def execute_neo4j_query(uri, username, password,query,  params=None, database='neo4j'):
    with GraphDatabase.driver(uri, auth=(username, password)) as driver:
        records,summary, keys = driver.execute_query(query, params, database=database)
        print(f'records: {records}, summary: {summary}, keys: {keys}')
        return records, summary, keys

In [9]:
rec, summ, keys = execute_neo4j_query(URI, "neo4j", "vava7878", "MATCH (n) RETURN (n)")

records: [<Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:0' labels=frozenset({'Node'}) properties={'id': 'OrderReceived', 'label': 'Order received', 'type': 'start'}>>, <Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:1' labels=frozenset({'Node'}) properties={'id': 'EnterOrderInSystem', 'label': 'Enter order in system', 'type': 'process'}>>, <Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:2' labels=frozenset({'Node'}) properties={'id': 'CreditCheck', 'label': 'Credit check', 'type': 'process'}>>, <Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:3' labels=frozenset({'Node'}) properties={'id': 'IsCreditGood', 'label': 'Is credit good?', 'type': 'decision'}>>, <Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:4' labels=frozenset({'Node'}) properties={'id': 'RefuseOrder', 'label': 'Refuse order', 'type': 'output'}>>, <Record n=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:5' labels=frozen

In [12]:
summ.metadata

{'query': 'MATCH (n) RETURN (n)',
 'parameters': {'database': 'neo4j'},
 'server': <neo4j.api.ServerInfo at 0x72b7a8127260>,
 'database': None,
 't_first': 3,
 'fields': ['n'],
 'qid': 0,
 'statuses': [{'gql_status': '00000',
   'status_description': 'note: successful completion',
   'diagnostic_record': {'OPERATION': '',
    'OPERATION_CODE': '0',
    'CURRENT_SCHEMA': '/'}}],
 'type': 'r',
 't_last': 3,
 'db': 'neo4j'}

In [23]:
data = {
	"nodes": [
		{
			"id": "1",
			"type_of_node": "start",
			"label": "Input Image"
		},
		{
			"id": "2",
			"type_of_node": "process",
			"label": "Image Enhancement"
		},
		{
			"id": "3",
			"type_of_node": "process",
			"label": "Image Segmentation"
		},
		{
			"id": "4",
			"type_of_node": "process",
			"label": "Cropping ROI Manually"
		},
		{
			"id": "5",
			"type_of_node": "process",
			"label": "Threshold and region based technique"
		},
		{
			"id": "6",
			"type_of_node": "process",
			"label": "NCNNN"
		},
		{
			"id": "7",
			"type_of_node": "process",
			"label": "Testing"
		},
		{
			"id": "8",
			"type_of_node": "terminator",
			"label": "Evaluation"
		}
	],
	"edges": [
		{
			"from_": "1",
			"to": "2",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		},
		{
			"from_": "2",
			"to": "3",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		},
		{
			"from_": "3",
			"to": "4",
			"type_of_edge": "solid",
			"relationship_type": "branches"
		},
		{
			"from_": "3",
			"to": "5",
			"type_of_edge": "solid",
			"relationship_type": "branches"
		},

		{
			"from_": "4",
			"to": "6",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		},
		{
			"from_": "5",
			"to": "6",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		},
		{
			"from_": "6",
			"to": "7",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		},
		{
			"from_": "7",
			"to": "8",
			"type_of_edge": "solid",
			"relationship_type": "follows"
		}
	]
}


In [44]:
dd = {
  "nodes":{
    "Person":[
      {
        "_uid": "abc123",
        "last name": "John",
      },
      {
        "_uid": "dcf456",
        "last name": "Bowery",
      },
    ]
  },
  "relationships":{
      "FRIENDS_WITH":[
        {
          "_from_uid": "abc123",
          "_to_uid": "dcf456",
          "since": 1997
        }
      ]
  }
}

In [45]:
for node_label, node_records in dd['nodes'].items():
    for node in node_records:
        print(f'node_label :{node_label} {node}')

node_label :Person {'_uid': 'abc123', 'last name': 'John'}
node_label :Person {'_uid': 'dcf456', 'last name': 'Bowery'}


In [46]:
def convert_nodes(nodes:dict):
    query = ''
    for node_label in nodes.keys():
        query += f"""CALL apoc.create.nodes(["{node_label}"],${node_label});"""
    return query, nodes


In [47]:
convert_nodes(dd['nodes'])

('CALL apoc.create.nodes(["Person"],$Person);',
 {'Person': [{'_uid': 'abc123', 'last name': 'John'},
   {'_uid': 'dcf456', 'last name': 'Bowery'}]})

In [48]:
execute_neo4j_query(URI, "neo4j", "vava7878", convert_nodes(dd['nodes'])[0], convert_nodes(dd['nodes'])[1])

records: [<Record node=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:2' labels=frozenset({'Person'}) properties={'_uid': 'abc123', 'last name': 'John'}>>, <Record node=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:3' labels=frozenset({'Person'}) properties={'_uid': 'dcf456', 'last name': 'Bowery'}>>], summary: <neo4j._work.summary.ResultSummary object at 0x72b76a209e20>, keys: ['node']


([<Record node=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:2' labels=frozenset({'Person'}) properties={'_uid': 'abc123', 'last name': 'John'}>>,
  <Record node=<Node element_id='4:6d1f0adb-16c6-4d53-b29a-4b6a931caee3:3' labels=frozenset({'Person'}) properties={'_uid': 'dcf456', 'last name': 'Bowery'}>>],
 <neo4j._work.summary.ResultSummary at 0x72b76a209e20>,
 ['node'])

In [29]:
node_types = set()

for node in data['nodes']:
    node_types.add(node['type_of_node'].capitalize())

In [30]:
node_types

{'Process', 'Start', 'Terminator'}

In [None]:
"""CALL apoc.create.nodes(["Process"],$Person);"""

In [31]:
for node_type in node_types:
    print(f"""CALL apoc.create.nodes(["{node_type}"],${node_type});""")

CALL apoc.create.nodes(["Start"],$Start);
CALL apoc.create.nodes(["Process"],$Process);
CALL apoc.create.nodes(["Terminator"],$Terminator);


In [49]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [71]:
data = {
  "nodes": [
    {
      "id": "1",
      "type_of_node": "start",
      "label": "Start"
    },
    {
      "id": "2",
      "type_of_node": "process",
      "label": "Initialize"
    },
    {
      "id": "3",
      "type_of_node": "delay",
      "label": "Delay"
    },
    {
      "id": "4",
      "type_of_node": "decision",
      "label": "Decision"
    },
    {
      "id": "5",
      "type_of_node": "process",
      "label": "Process"
    },
    {
      "id": "6",
      "type_of_node": "process",
      "label": "Print result"
    },
    {
      "id": "7",
      "type_of_node": "terminator",
      "label": "End"
    }
  ],
  "edges": [
    {
      "from_": "1",
      "from_type": "start",
      "to": "2",
      "to_type": "process",
      "type_of_edge": "solid",
      "relationship_value": "",
      "relationship_type": "follows"
    },
    {
      "from_": "2",
      "from_type": "process",
      "to": "4",
      "to_type": "decision",
      "type_of_edge": "solid",
      "relationship_value": "",
      "relationship_type": "follows"
    },
    {
      "from_": "4",
      "from_type": "decision",
      "to": "6",
      "to_type": "process",
      "type_of_edge": "solid",
      "relationship_value": "yes",
      "relationship_type": "branches"
    },
    {
      "from_": "4",
      "from_type": "decision",
      "to": "5",
      "to_type": "process",
      "type_of_edge": "solid",
      "relationship_value": "no",
      "relationship_type": "branches"
    },
    {
      "from_": "5",
      "from_type": "process",
      "to": "3",
      "to_type": "delay",
      "type_of_edge": "solid",
      "relationship_value": "",
      "relationship_type": "follows"
    },
    {
      "from_": "3",
      "from_type": "delay",
      "to": "2",
      "to_type": "process",
      "type_of_edge": "dashed",
      "relationship_value": "",
      "relationship_type": "depends_on"
    },
    {
      "from_": "6",
      "from_type": "process",
      "to": "7",
      "to_type": "terminator",
      "type_of_edge": "solid",
      "relationship_value": "",
      "relationship_type": "follows"
    }
  ]
}

In [None]:
# Transaction functions, first params is always a transaction object
def create_nodes(tx, nodes:dict):
    for node in nodes:
        query = (
            "MERGE (n:" + node["type_of_node"].capitalize() + " {id: $id}) "
            "SET n.type_of_node = $type_of_node, n.label = $label"
        )
        tx.run(
            query, id=node["id"], type_of_node=node["type_of_node"], label=node["label"]
        )


def create_edges(tx, edges:dict):
    for edge in edges:
        query = (
            "MATCH (a:"
            + edge["from_type"].capitalize()
            + " {id: $from_id}), (b:"
            + edge["to_type"].capitalize()
            + " {id: $to_id}) "
            "MERGE (a)-[r:"
            + edge["relationship_type"].upper()
            + " {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)"
        )
        # MATCH (a:Start {id: $from_id}), (b:Process {id: $to_id}) MERGE (a)-[r:FOLLOWS {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
        print(query)
        tx.run(
            query,
            from_id=edge["from_"],
            to_id=edge["to"],
            type_of_edge=edge["type_of_edge"],
            relationship_value=edge["relationship_value"],
        )

with driver.session() as session:
    session.execute_write(create_nodes, data['nodes'])
    session.execute_write(create_edges, data['edges'])

MATCH (a:Start {id: $from_id}), (b:Process {id: $to_id}) MERGE (a)-[r:FOLLOWS {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Process {id: $from_id}), (b:Decision {id: $to_id}) MERGE (a)-[r:FOLLOWS {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Decision {id: $from_id}), (b:Process {id: $to_id}) MERGE (a)-[r:BRANCHES {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Decision {id: $from_id}), (b:Process {id: $to_id}) MERGE (a)-[r:BRANCHES {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Process {id: $from_id}), (b:Delay {id: $to_id}) MERGE (a)-[r:FOLLOWS {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Delay {id: $from_id}), (b:Process {id: $to_id}) MERGE (a)-[r:DEPENDS_ON {type_of_edge: $type_of_edge, relationship_value: $relationship_value}]->(b)
MATCH (a:Process {id: $from_id}), (b:Terminator {id: $to