In [7]:
!pip install --quiet langchain langchain-community langchain-openai neo4j

In [8]:
import os
from typing import Dict, List, Optional, Tuple, Type

from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.tools import BaseTool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

In [11]:
import os

os.environ["OPENAI_API_KEY"] = "s"
os.environ["NEO4J_URI"] = "neo4j+ssc://demo.neo4jlabs.com"
os.environ["NEO4J_USERNAME"] = "companies"
os.environ["NEO4J_PASSWORD"] = "companies"
os.environ["NEO4J_DATABASE"] = "companies"

In [12]:
embeddings = OpenAIEmbeddings()
graph = Neo4jGraph()
vector_index = Neo4jVector.from_existing_index(
    embeddings,
    index_name="news"
)

In [13]:
# Code for mapping organizations from user input to database using Full-text index
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~0.8) to each word, then combines them using the AND
    operator. Useful for mapping movies and people from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

candidate_query = """
CALL db.index.fulltext.queryNodes($index, $fulltextQuery, {limit: $limit})
YIELD node
WHERE node:Organization // Filter organization nodes
RETURN distinct node.name AS candidate
"""


def get_candidates(input: str, limit: int = 5) -> List[Dict[str, str]]:
    """
    Retrieve a list of candidate entities from database based on the input string.

    This function queries the Neo4j database using a full-text search. It takes the
    input string, generates a full-text query, and executes this query against the
    specified index in the database. The function returns a list of candidates
    matching the query.
    """
    ft_query = generate_full_text_query(input)
    candidates = graph.query(
        candidate_query, {"fulltextQuery": ft_query, "index": 'entity', "limit": limit}
    )
    # If there is direct match return only that, otherwise return all options
    direct_match = [el["candidate"] for el in candidates if el["candidate"].lower() == input.lower()]
    if direct_match:
        return direct_match

    return [el["candidate"] for el in candidates]

In [14]:
get_candidates("neo4")

['Net4', 'Neo4j', 'Neos', 'Neo', 'Neon Software']

In [15]:
def get_organization_news(
    topic: Optional[str] = None,
    organization: Optional[str] = None,
    country: Optional[str] = None,
    sentiment: Optional[str] = None,
) -> str:
    # If there is no prefiltering, we can use vector index
    if topic and not organization and not country and not sentiment:
        return vector_index.similarity_search(topic)
    # Uses parallel runtime where available
    base_query = (
        "CYPHER runtime = parallel parallelRuntimeSupport=all "
        "MATCH (c:Chunk)<-[:HAS_CHUNK]-(a:Article) WHERE "
    )
    where_queries = []
    params = {"k": 5}  # Define the number of text chunks to retrieve
    if organization:
        # Map to database
        candidates = get_candidates(organization)
        if len(candidates) > 1:  # Ask for follow up if too many options
            return (
                "Ask a follow up question which of the available organizations "
                f"did the user mean. Available options: {candidates}"
            )
        where_queries.append(
            "EXISTS {(a)-[:MENTIONS]->(:Organization {name: $organization})}"
        )
        params["organization"] = candidates[0]
    if country:
        # No need to disambiguate
        where_queries.append(
            "EXISTS {(a)-[:MENTIONS]->(:Organization)-[:IN_CITY]->()-[:IN_COUNTRY]->(:Country {name: $country})}"
        )
        params["country"] = country

    if sentiment:
        if sentiment == "positive":
            where_queries.append("a.sentiment > $sentiment")
            params["sentiment"] = 0.5
        else:
            where_queries.append("a.sentiment < $sentiment")
            params["sentiment"] = -0.5
    if topic:  # Do vector comparison
        vector_snippet = (
            " WITH c, a, vector.similarity.cosine(c.embedding,$embedding) AS score "
            "ORDER BY score DESC LIMIT toInteger($k) "
        )
        params["embedding"] = embeddings.embed_query(topic)
        params["topic"] = topic
    else:  # Just return the latest data
        vector_snippet = " WITH c, a ORDER BY a.date DESC LIMIT toInteger($k) "

    return_snippet = "RETURN '#title ' + a.title + '\n#date ' + toString(a.date) + '\n#text ' + c.text AS output"

    complete_query = (
        base_query + " AND ".join(where_queries) + vector_snippet + return_snippet
    )
    data = graph.query(complete_query, params)
    print(f"Cypher: {complete_query}\n")
    # Safely remove embedding before printing
    params.pop('embedding', None)
    print(f"Parameters: {params}")
    return "###Article: ".join([el["output"] for el in data])

In [16]:
get_organization_news(
    organization='neo4j',
    sentiment='positive',
    topic='remote work'
)

Cypher: CYPHER runtime = parallel parallelRuntimeSupport=all MATCH (c:Chunk)<-[:HAS_CHUNK]-(a:Article) WHERE EXISTS {(a)-[:MENTIONS]->(:Organization {name: $organization})} AND a.sentiment > $sentiment WITH c, a, vector.similarity.cosine(c.embedding,$embedding) AS score ORDER BY score DESC LIMIT toInteger($k) RETURN '#title ' + a.title + '
#date ' + toString(a.date) + '
#text ' + c.text AS output

Parameters: {'k': 5, 'organization': 'Neo4j', 'sentiment': 0.5, 'topic': 'remote work'}


'#title Accounts in Transit: Ruder Finn Adds Neo4j\n#date 2023-04-27T00:00:00Z\n#text Ruder Finn signs on as North American agency of record for Neo4j, a native graph database and analytics company. The agency will be responsible for implementing an integrated communications program, as well as working to amplify awareness of the company and category. The scope of work will include strategic media relations and executive communications to support corporate and product PR. Antonia Caamaño, SVP of RF Tech, will lead the Ruder Finn team handling the account out of New York. "We chose Ruder Finn to achieve our next stage of awareness because of the agency\'s experience in enterprise IT and deep tech, which allows them to deliver smart strategies and creative executions, as well as their long-running relationships with top-tier media,” said Neo4j CMO Chandra Rangan.\nOak Public Relations is named communications agency of record for Custom Cones USA, which produces supplies for cannabis pre-

In [17]:
fewshot_examples = """{Input:What are the health benefits for Google employees in the news? Topic: Health benefits}
{Input: What is the latest positive news about Google? Topic: None}
{Input: Are there any news about VertexAI regarding Google? Topic: VertexAI}
{Input: Are there any news about new products regarding Google? Topic: new products}
"""

class NewsInput(BaseModel):
    topic: Optional[str] = Field(
        description="Any particular topic that the user wants to finds information for. Here are some examples: "
        + fewshot_examples
    )
    organization: Optional[str] = Field(
        description="Organization that the user wants to find information about"
    )
    country: Optional[str] = Field(
        description="Country of organizations that the user is interested in. Use full names like United States of America and France."
    )
    sentiment: Optional[str] = Field(
        description="Sentiment of articles", enum=["positive", "negative"]
    )


In [18]:
class NewsTool(BaseTool):
    name = "NewsInformation"
    description = (
        "useful for when you need to find relevant information in the news"
    )
    args_schema: Type[BaseModel] = NewsInput

    def _run(
        self,
        topic: Optional[str] = None,
        organization: Optional[str] = None,
        country: Optional[str] = None,
        sentiment: Optional[str] = None,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Use the tool."""
        return get_organization_news(topic, organization, country, sentiment)

    async def _arun(
        self,
        topic: Optional[str] = None,
        organization: Optional[str] = None,
        country: Optional[str] = None,
        sentiment: Optional[str] = None,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        """Use the tool asynchronously."""
        return get_organization_news(topic, organization, country, sentiment)

In [19]:
llm = ChatOpenAI(temperature=0, model="gpt-4-turbo", streaming=True)
tools = [NewsTool()]

llm_with_tools = llm.bind(functions=[convert_to_openai_function(t) for t in tools])

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that finds information about movies "
            " and recommends them. If tools require follow up questions, "
            "make sure to ask the user for clarification. Make sure to include any "
            "available options that need to be clarified in the follow up questions "
            "Do only the things the user specifically requested. ",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

def _format_chat_history(chat_history: List[Tuple[str, str]]):
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer


agent = (
    {
        "input": lambda x: x["input"],
        "chat_history": lambda x: _format_chat_history(x["chat_history"])
        if x.get("chat_history")
        else [],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools)

In [20]:
agent_executor.invoke({"input": "What are some positive news regarding neo4j?"})

Cypher: CYPHER runtime = parallel parallelRuntimeSupport=all MATCH (c:Chunk)<-[:HAS_CHUNK]-(a:Article) WHERE EXISTS {(a)-[:MENTIONS]->(:Organization {name: $organization})} AND a.sentiment > $sentiment WITH c, a ORDER BY a.date DESC LIMIT toInteger($k) RETURN '#title ' + a.title + '
#date ' + toString(a.date) + '
#text ' + c.text AS output

Parameters: {'k': 5, 'organization': 'Neo4j', 'sentiment': 0.5}


{'input': 'What are some positive news regarding neo4j?',
 'output': "Here are some positive news regarding Neo4j:\n\n1. **New Product Integrations with Generative AI Features in Google Cloud Vertex AI**:\n   - Neo4j announced a new product integration with Google Cloud's latest generative AI features in Vertex AI. This integration allows enterprise customers to use knowledge graphs built on Neo4j's cloud offerings in Google Cloud Platform for more accurate, transparent, and explainable generative AI insights and recommendations. This partnership, which began in 2019, has enabled various AI use cases across large enterprises and SMBs, ranging from anti-money laundering to personalized recommendations and more.\n\n2. **GraphSummit Australia and Graphie Awards**:\n   - During the 2023 GraphSummit in Australia, Neo4j announced the winners of the 2023 Graphie Awards, recognizing organizations and individuals for outstanding innovation in implementing Neo4j’s graph technology. DXC Technolog

In [21]:
agent_executor.invoke({"input": "What are some of the latest negative news about employee happiness for companies from France?"})

Cypher: CYPHER runtime = parallel parallelRuntimeSupport=all MATCH (c:Chunk)<-[:HAS_CHUNK]-(a:Article) WHERE EXISTS {(a)-[:MENTIONS]->(:Organization)-[:IN_CITY]->()-[:IN_COUNTRY]->(:Country {name: $country})} AND a.sentiment < $sentiment WITH c, a, vector.similarity.cosine(c.embedding,$embedding) AS score ORDER BY score DESC LIMIT toInteger($k) RETURN '#title ' + a.title + '
#date ' + toString(a.date) + '
#text ' + c.text AS output

Parameters: {'k': 5, 'country': 'France', 'sentiment': -0.5, 'topic': 'employee happiness'}


{'input': 'What are some of the latest negative news about employee happiness for companies from France?',
 'output': 'Here are some of the latest negative news related to employee happiness for companies from France:\n\n1. **IBM Whistleblower Case**:\n   - **Date**: October 13, 2020\n   - **Summary**: IBM was ordered to pay £22,000 in compensation and two years\' salary to a British employee who blew the whistle on unlawful working practices within the company. The employee faced retaliation from managers after raising concerns about conditions that could amount to sex discrimination. The tribunal criticized IBM\'s managers for their lack of understanding of discrimination and the hostile work environment created for the whistleblower.\n\n2. **Manufacturing Business Leaders Resist Digital Progress**:\n   - **Date**: February 1, 2021\n   - **Summary**: A report titled "The Connected Enterprise" highlighted skepticism among manufacturing industry leaders in France regarding the benefits