In [35]:
import os
import sys
sys.path.append(os.path.abspath("../"))

from dotenv import find_dotenv, load_dotenv
from funes.agents.agent_types import Role, Persona, AutogenAgentType
from llm_foundation import logger
from llm_foundation.basic_structs import Provider, LMConfig
from pprint import pprint
import tempfile

import autogen
from autogen.coding import LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor
from autogen.agentchat import GroupChat, GroupChatManager

from langchain.tools import BaseTool
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_core.utils.function_calling import convert_to_openai_function


load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

temp_dir = tempfile.TemporaryDirectory()

lm_config = LMConfig(model="gpt-4o-mini", provider=Provider.Autogen)
llm_config = lm_config.to_autogen()
llm_config

{'seed': 14,
 'functions': None,
 'config_list': [{'model': 'gpt-4o-mini',
   'api_key': ''}],
 'temperature': 0.0}

In [36]:
francisco = Persona.from_json_file("Persona/Francisco.json")
neo4j_persona = Persona.from_json_file("Persona/Neo4jExpert.json")
print(francisco, neo4j_persona)

Persona: Francisco
  Role: learner
  Description: A highly motivated human that is eager to learn.
  Agent System Message: A human admin. Reply TERMINATE when the task is done.
  Autogen Code Execution Config: {}
 Persona: Ian_Robinson
  Role: neo4j
  Description: An expert in graph databases and Cypher query language in particular.
  Agent System Message: You are an expert in generating the most flexible queries in Cypher language to serve the user requests. You will receive json objects that will encode nodes, keys and relationships in a Cypher knowledge graph. You will identify if the current graph has already present concepts and keys that you can reuse them to generate the partial graph structures encoding the json information, e.g. if the graph has a Person node integrate an Author as a person that has a property author in a relationship WROTE. Name all nodes and relationships (e.g. MERGE (a)-[wrote:WROTE]->(b)). Reply only with the Cypher queries and RETURN statements with the a

In [37]:
NEO4J_URI = os.environ["NEO4J_URI"]
NEO4J_USERNAME = os.environ["NEO4J_USERNAME"]
NEO4J_PASSWORD = os.environ["NEO4J_PASSWORD"]
NEO4J_DATABASE = "neo4j"

In [38]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

print(type(kg))

<class 'langchain_community.graphs.neo4j_graph.Neo4jGraph'>


In [39]:
kg.get_structured_schema



{'node_props': {'Person': [{'property': 'name', 'type': 'STRING'},
   {'property': 'last_name', 'type': 'STRING'},
   {'property': 'birth_date', 'type': 'STRING'}],
  'Author': [{'property': 'name', 'type': 'STRING'}],
  'Book': [{'property': 'year', 'type': 'STRING'},
   {'property': 'isbn', 'type': 'STRING'},
   {'property': 'title', 'type': 'STRING'},
   {'property': 'url', 'type': 'STRING'}],
  'Publisher': [{'property': 'name', 'type': 'STRING'}]},
 'rel_props': {},
 'relationships': [{'start': 'Person', 'type': 'sibling', 'end': 'Person'},
  {'start': 'Person', 'type': 'WROTE', 'end': 'Book'},
  {'start': 'Author', 'type': 'WROTE', 'end': 'Book'},
  {'start': 'Book', 'type': 'PUBLISHED_BY', 'end': 'Publisher'}],
 'metadata': {'constraint': [], 'index': []}}

In [40]:
query = """
CREATE (a:Author {name: 'Sebastian Raschka'}),\n       (b:Book {title: 'Build A Large Language Model (From Scratch)', year: '2024', isbn: '978-1633437166', url: 'https://www.manning.com/books/build-a-large-language-model-from-scratch'}),\n       (p:Publisher {name: 'Manning'}),\n       (a)-[:WROTE]->(b),\n       (b)-[:PUBLISHED_BY]->(p);\n
"""

result = kg.query(query)
result

[]

In [41]:


from abc import ABC
from typing import List, Optional
from langchain_community.graphs.neo4j_graph import Neo4jGraph


class GraphMetadata(ABC):
    
    def __init__(self, graph: Neo4jGraph):
        self.graph = graph
        self.schema = self.graph.get_structured_schema

    def get_node_names(self) -> List[str]:
        nodes = self.schema["node_props"]
        return list(nodes.keys())

    def get_edge_names(self, origin: Optional[str] = None, dest: Optional[str] = None):
        
        def filter_edges_on_prop(edges, prop_id, value):
            return [edge for edge in edges if edge[prop_id] == value]
        
        nodes = self.schema["relationships"]
        
        if origin is not None:
            nodes = filter_edges_on_prop(nodes, "start", origin)

        if dest is not None:
            nodes = filter_edges_on_prop(nodes, "end", dest)
        
        return [node['type'] for node in nodes], nodes

    def is_node_in_graph(self, node: str):
        nodes = self.get_node_names()
        return node in nodes

    def get_node_attributes_from_node(self, node):
        attributes = []
        if self.is_node_in_graph(node):
            attributes = [property_info["property"] for property_info in self.schema["node_props"][node]]
        return attributes

    def get_node_instance(self, node: str, instance_id: str, instance_name: str):

        res = ""        
        if self.is_node_in_graph(node):
            query_node_id = f"{node.lower()}"
            res = self.graph.query(f"""
                          MATCH ({query_node_id}:{node}) 
                          WHERE {query_node_id}.{instance_id} = '{instance_name}' 
                          RETURN {query_node_id}
                          """)
        return res

graph_metadata = GraphMetadata(kg)


class GetNodeNames(BaseTool):
    name = "get_node_names"
    description = "Extract node names from a graph"
    # args_schema: Type[BaseModel] = SearchToolInput
    graph: GraphMetadata

    def _run(self):
        return self.graph.get_node_names()
    
class GetRelationshipNames(BaseTool):
    name = "get_relationship_names"
    description = "Extract relationship names from a graph"
    # args_schema: Type[BaseModel] = SearchToolInput
    graph: GraphMetadata

    def _run(self):
        return self.graph.get_edge_names()
        

get_node_names = GetNodeNames(graph=graph_metadata)
get_relationship_names = GetRelationshipNames(graph=graph_metadata)

# Get graph nodes 
print("Graph Nodes")
print(graph_metadata.get_node_names())
print(graph_metadata.get_node_attributes_from_node("Person"))

# Get edges
print("Graph Edges")
print(f"All: {graph_metadata.get_edge_names()}")
print(f"Person-as-origin: {graph_metadata.get_edge_names('Person')}")
print(f"Person-as-end: {graph_metadata.get_edge_names(None, 'Person')}")
print(f"Book-as-origin: {graph_metadata.get_edge_names('Book')}")
print(f"Book-as-end: {graph_metadata.get_edge_names(None, 'Book')}")
print(f"Person-Book: {graph_metadata.get_edge_names('Person', 'Book')}")

# Get graph node instances
print("Graph Francisco Node Instances")
print(f"All instances: {graph_metadata.get_node_instance('Person', 'name', 'Francisco')}")

Graph Nodes
['Person', 'Author', 'Book', 'Publisher']
['name', 'last_name', 'birth_date']
Graph Edges
All: (['sibling', 'WROTE', 'WROTE', 'PUBLISHED_BY'], [{'start': 'Person', 'type': 'sibling', 'end': 'Person'}, {'start': 'Person', 'type': 'WROTE', 'end': 'Book'}, {'start': 'Author', 'type': 'WROTE', 'end': 'Book'}, {'start': 'Book', 'type': 'PUBLISHED_BY', 'end': 'Publisher'}])
Person-as-origin: (['sibling', 'WROTE'], [{'start': 'Person', 'type': 'sibling', 'end': 'Person'}, {'start': 'Person', 'type': 'WROTE', 'end': 'Book'}])
Person-as-end: (['sibling'], [{'start': 'Person', 'type': 'sibling', 'end': 'Person'}])
Book-as-origin: (['PUBLISHED_BY'], [{'start': 'Book', 'type': 'PUBLISHED_BY', 'end': 'Publisher'}])
Book-as-end: (['WROTE', 'WROTE'], [{'start': 'Person', 'type': 'WROTE', 'end': 'Book'}, {'start': 'Author', 'type': 'WROTE', 'end': 'Book'}])
Person-Book: (['WROTE'], [{'start': 'Person', 'type': 'WROTE', 'end': 'Book'}])
Graph Francisco Node Instances
All instances: [{'perso

In [42]:
from neo4j import GraphDatabase

In [47]:
!rm -rf .cache

get_node_names_tool = convert_to_openai_function(get_node_names)
get_relationship_names_tool = convert_to_openai_function(get_relationship_names)

tools = [get_node_names_tool, get_relationship_names_tool]
llm_with_tools_config = llm_config.copy()
llm_with_tools_config.update({"functions": tools})



neo4j_agent = neo4j_persona.role_to_autogen_agent("neo4j", AutogenAgentType.AssistantAgent, llm_config=llm_with_tools_config)
# neo4j_agent.register_for_execution(name="get_node_names")(get_node_names._run)
neo4j_agent.register_function(
    function_map={
        get_node_names.name: get_node_names._run,
        get_relationship_names.name: get_relationship_names._run,
    })


# This guy just writes code
neo4j_python_dev_agent = neo4j_persona.role_to_autogen_agent("neo4j_python_dev", AutogenAgentType.AssistantAgent, llm_config=llm_with_tools_config, code_execution_config=False)


# Create a local command line code executor.
executor = LocalCommandLineCodeExecutor(
    timeout=10,  # Timeout for each code execution in seconds.
    work_dir=temp_dir.name,  # Use the temporary directory to store the code files.
)

# Create an agent with code executor configuration.
neo4j_code_executor_agent = neo4j_persona.role_to_autogen_agent("neo4j_code_executor", AutogenAgentType.AssistantAgent, llm_config=False, code_execution_config={"executor": executor})

francisco_learner = francisco.role_to_autogen_agent("learner", AutogenAgentType.UserProxyAgent, "NEVER", llm_config=llm_config, termination_function=lambda msg: "terminate" in msg["content"].lower(),)

agent_speaker_transitions_dict = {
    francisco_learner: [neo4j_agent],
    neo4j_agent: [francisco_learner, neo4j_python_dev_agent],
    neo4j_python_dev_agent: [neo4j_code_executor_agent],
    neo4j_code_executor_agent: [neo4j_python_dev_agent, francisco_learner]
}

groupchat = GroupChat(
    agents = [neo4j_agent, neo4j_python_dev_agent, neo4j_code_executor_agent, francisco_learner],
    messages=[],
    max_round=10,
    select_speaker_auto_verbose=True,
    speaker_transitions_type="allowed",  # This has to be specified if the transitions below apply
    allowed_or_disallowed_speaker_transitions=agent_speaker_transitions_dict,
)

manager = GroupChatManager(
    groupchat=groupchat, 
    llm_config=llm_config,
    system_message="You act as a coordinator for different specialiced roles. If you don't have anything to say, just say TERMINATE."
)        


book_json = {
    "author": "Sebastian Raschka",
    "title": "Build A Large Language Model (From Scratch)",
    "publisher": "Manning",
    "year": "2024",
    "isbn": "978-1633437166",
    "url": "https://www.manning.com/books/build-a-large-language-model-from-scratch"
}

book_json = {
  "title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
  "authors": [
    "Qingyun Wu",
    "Gagan Bansal",
    "Jieyu Zhang",
    "Yiran Wu",
    "Beibin Li",
    "Erkang Zhu",
    "Li Jiang",
    "Xiaoyun Zhang",
    "Shaokun Zhang",
    "Jiale Liu",
    "Ahmed H Awadallah",
    "Ryen White",
    "Doug Burger",
    "Chi Wang"
  ],
  "publish_date": "September 25, 2023",
  "conference": "Workshop on Large Language Models for Agents",
  "uri": "https://arxiv.org/abs/2308.08155"    
}

content = f"""
Get an accurate Cypher query to encode the information on the following json object. 
Then, make a python program to execute in neo4j. 
Execute the program, fixing any errors that may arise. 
The NEO4J_USERNAME, NEO4J_PASSWORD and NEO4J_URI environment variables can be read from a .env file in the current dir.

{book_json}
"""

response = francisco_learner.initiate_chat(
    manager,
    message={"content": content, "role": "user"},
)


def find_last_message(name: str, chat_history):
    for message in reversed(chat_history):
        if message["name"] == name:
            return message
    return None

print(find_last_message("neo4j", response.chat_history)["content"])


# francisco_learner.initiate_chat(neo4j_agent, message="")

















[33mlearner[0m (to chat_manager):


Get an accurate Cypher query to encode the information on the following json object. 
Then, make a python program to execute in neo4j. 
Execute the program, fixing any errors that may arise. 
The NEO4J_USERNAME, NEO4J_PASSWORD and NEO4J_URI environment variables can be read from a .env file in the current dir.

{'title': 'AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation', 'authors': ['Qingyun Wu', 'Gagan Bansal', 'Jieyu Zhang', 'Yiran Wu', 'Beibin Li', 'Erkang Zhu', 'Li Jiang', 'Xiaoyun Zhang', 'Shaokun Zhang', 'Jiale Liu', 'Ahmed H Awadallah', 'Ryen White', 'Doug Burger', 'Chi Wang'], 'publish_date': 'September 25, 2023', 'conference': 'Workshop on Large Language Models for Agents', 'uri': 'https://arxiv.org/abs/2308.08155'}


--------------------------------------------------------------------------------
[32m
Next speaker: neo4j
[0m


2024-09-28 00:01:16 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mneo4j[0m (to chat_manager):

[32m***** Suggested function call: get_node_names *****[0m
Arguments: 
{}
[32m***************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: neo4j
[0m
[35m
>>>>>>>> EXECUTING FUNCTION get_node_names...[0m
[33mneo4j[0m (to chat_manager):

[32m***** Response from calling function (get_node_names) *****[0m
['Person', 'Author', 'Book', 'Publisher']
[32m***********************************************************[0m

--------------------------------------------------------------------------------




[33mchecking_agent[0m (to speaker_selection_agent):

Read the above conversation. Then select the next role from ['neo4j_python_dev', 'learner'] to play. Only return the role.

--------------------------------------------------------------------------------


2024-09-28 00:01:16 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mspeaker_selection_agent[0m (to checking_agent):

neo4j_python_dev

--------------------------------------------------------------------------------
[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: neo4j_python_dev[0m
[32m
Next speaker: neo4j_python_dev
[0m


2024-09-28 00:01:17 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mneo4j_python_dev[0m (to chat_manager):

[32m***** Suggested function call: get_relationship_names *****[0m
Arguments: 
{}
[32m***********************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: neo4j
[0m
[35m
>>>>>>>> EXECUTING FUNCTION get_relationship_names...[0m
[33mneo4j[0m (to chat_manager):

[32m***** Response from calling function (get_relationship_names) *****[0m
(['sibling', 'WROTE', 'WROTE', 'PUBLISHED_BY'], [{'start': 'Person', 'type': 'sibling', 'end': 'Person'}, {'start': 'Person', 'type': 'WROTE', 'end': 'Book'}, {'start': 'Author', 'type': 'WROTE', 'end': 'Book'}, {'start': 'Book', 'type': 'PUBLISHED_BY', 'end': 'Publisher'}])
[32m*******************************************************************[0m

--------------------------------------------------------------------------------




[33mchecking_agent[0m (to speaker_selection_agent):

Read the above conversation. Then select the next role from ['neo4j_python_dev', 'learner'] to play. Only return the role.

--------------------------------------------------------------------------------


2024-09-28 00:01:17 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mspeaker_selection_agent[0m (to checking_agent):

neo4j_python_dev

--------------------------------------------------------------------------------
[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: neo4j_python_dev[0m
[32m
Next speaker: neo4j_python_dev
[0m


2024-09-28 00:01:26 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mneo4j_python_dev[0m (to chat_manager):

To encode the provided JSON object into a Neo4j graph, we will create nodes for the book and its authors, and establish relationships between them. The Cypher query will create a `Book` node and `Author` nodes, and link them with the `WROTE` relationship.

Here’s the plan:
1. Create a `Book` node with properties from the JSON object.
2. Create `Author` nodes for each author in the list.
3. Create `WROTE` relationships between the `Book` node and each `Author` node.

Now, I will write a Python program to execute this in Neo4j. The program will read the environment variables from a `.env` file, connect to the Neo4j database, and execute the Cypher query.

Here is the complete Python code:

```python
# filename: encode_book.py
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase

# Load environment variables from .env file
load_dotenv()
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PA



[33mchecking_agent[0m (to speaker_selection_agent):

Read the above conversation. Then select the next role from ['neo4j_python_dev', 'learner'] to play. Only return the role.

--------------------------------------------------------------------------------


2024-09-28 00:01:28 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mspeaker_selection_agent[0m (to checking_agent):

learner

--------------------------------------------------------------------------------
[32m>>>>>>>> Select speaker attempt 1 of 3 successfully selected: learner[0m
[32m
Next speaker: learner
[0m


2024-09-28 00:01:30 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mlearner[0m (to chat_manager):


To address this, you can update the code as follows:

```python
# Replace this line
# session.write_transaction(create_book_and_authors, data)

# With this line
session.execute_write(create_book_and_authors, data)
```

Make this change in your code, and it will be aligned with the latest Neo4j Python driver conventions.

If you need any further assistance or modifications, let me know! 

TERMINATE

--------------------------------------------------------------------------------
[32m
Next speaker: neo4j
[0m


2024-09-28 00:01:30 fperez-gcloud-stupid-sailor-twift httpx[4101110] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mneo4j[0m (to chat_manager):

TERMINATE

--------------------------------------------------------------------------------
TERMINATE


In [45]:
uri = os.getenv("NEO4J_URI")
user = os.getenv("NEO4J_USER")
password = os.getenv("NEO4J_PASSWORD")

# Create a Neo4j driver
driver = GraphDatabase.driver(uri, auth=(user, password))

In [None]:

book_json = {
    "author": "Sebastian Raschka",
    "title": "Build A Large Language Model (From Scratch)",
    "publisher": "Manning",
    "year": "2024",
    "isbn": "978-1633437166",
    "url": "https://www.manning.com/books/build-a-large-language-model-from-scratch"
}


content = f"""
Given the following json object about a book release, identify the main entities and relationships to build a graph encoded with nodes and relationships
with Cypher language. Name all nodes and relationships (e.g. MERGE (a)-[wrote:WROTE]->(b)). Check also first if the graph contains a high level entity 
for some of the new entities identified and try to integrate better the new subgraph (e.g. if the graph has a Person node integrate an Author as a person
that has a property author in a relationship WROTE). Reply only with the Cypher queries and RETURN statements with the affected 
nodes and relationships, but without any wrappers nor bat-ticks.

{book_json}
"""

print(content)

resp = francisco_learner.generate_reply(
    messages=[{"content": content, "role": "user"}]
)

pprint(resp)


In [None]:
print(resp)

In [None]:
content = f"""
Given the following Cypher query, identify if the current graph contains a high level entity 
for some of the new entities identified in the query and try to use them in the new subgraph (e.g. check if the graph has a IndividualContributor node 
and in the query there has been identified an Boss, rewrite the query to use Person as node and integrate the author as a property
of a possible relationship MANAGES). Name all nodes and relationships (e.g. MERGE (a)-[wrote:WROTE]->(b)) Reply only with the rewriten Cypher queries and RETURN statements with the affected nodes and 
relationships, but without any wrappers nor bat-ticks.

{resp}
"""

print(content)

resp_rewrite = francisco_learner.generate_reply(
    messages=[{"content": content, "role": "user"}]
)

pprint(resp_rewrite)


In [None]:
result = kg.query(resp)
result