# Init

In [None]:
from app import app
context = app.app_context()
context.push()

## Imports

In [2]:
import yaml

## Utils

In [3]:
def show(*objs):
    for obj in objs:
        if not isinstance(obj, str):            
            try:
                obj = yaml.dump(obj)
            except Exception as e:
                import logging
                logging.warning(e)
                pass
        print(obj)

def show_response(obj):
    for choice in response.choices:
        show(choice.message.content)

## Inputs

In [4]:
query="What is the relationship between Zn2+ and glycolate?"

## Initial check

Current response:

In [5]:
import openai
response = openai.ChatCompletion.create(
    messages=[
        { "role": "user", "content": query }
    ],
    model="gpt-3.5-turbo",
    temperature=0
)
show_response(response)

The relationship between Zn2+ and glycolate is that Zn2+ can form a complex with glycolate. In this complex, the Zn2+ ion binds to the oxygen atoms in the glycolate molecule, forming coordination bonds. This interaction between Zn2+ and glycolate allows for the stabilization and transport of glycolate in biological systems.


# Extract core terms

## Init model

In [6]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0)

## Pepare prompts

In [7]:
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
QUERY_KEY = 'query'

core_terms_prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate(
        prompt=PromptTemplate(
            template="Always response with comma separated list of chemical or biological terms identified in prompt.",
            input_variables=[]
        )
    ),
    HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            template=f"{{{QUERY_KEY}}}",
            input_variables=[QUERY_KEY],
        )
    )
])
show(core_terms_prompt_template)

!!python/object:langchain.prompts.chat.ChatPromptTemplate
__dict__:
  input_variables:
  - query
  messages:
  - !!python/object:langchain.prompts.chat.SystemMessagePromptTemplate
    __dict__: &id007
      additional_kwargs: {}
      prompt: !!python/object:langchain.prompts.prompt.PromptTemplate
        __dict__: &id001
          input_variables: []
          output_parser: null
          partial_variables: {}
          template: Always response with comma separated list of chemical or biological
            terms identified in prompt.
          template_format: f-string
          validate_template: true
        __fields_set__: &id002 !!set
          input_variables: null
          template: null
        __private_attribute_values__:
          _lc_kwargs: &id003
            input_variables: []
            template: Always response with comma separated list of chemical or biological
              terms identified in prompt.
    __fields_set__: &id008 !!set
      prompt: null
    __pri

## Create output parser

In [8]:
from langchain.output_parsers import CommaSeparatedListOutputParser
output_parser = CommaSeparatedListOutputParser()

## Create chain

In [9]:
from langchain import LLMChain
CORE_TERMS_KEY = 'core_terms'
core_terms_chain = LLMChain(
    llm=llm,
    prompt=core_terms_prompt_template,
    output_parser=output_parser,
    output_key=CORE_TERMS_KEY,
    verbose=True
)
show(core_terms_chain)

!!python/object:langchain.chains.llm.LLMChain
__dict__:
  callback_manager: null
  callbacks: null
  llm: !!python/object:langchain.chat_models.openai.ChatOpenAI
    __dict__: &id013
      cache: null
      callback_manager: null
      callbacks: null
      client: !!python/name:openai.api_resources.chat_completion.ChatCompletion ''
      max_retries: 6
      max_tokens: null
      metadata: null
      model_kwargs: {}
      model_name: gpt-3.5-turbo
      n: 1
      openai_api_base: ''
      openai_api_key: ***OPENAI_API_KEY***
      openai_organization: ''
      openai_proxy: ''
      request_timeout: null
      streaming: false
      tags: null
      temperature: 0.0
      tiktoken_model_name: null
      verbose: false
    __fields_set__: &id014 !!set
      model_kwargs: null
      temperature: null
    __private_attribute_values__:
      _lc_kwargs: &id015
        temperature: 0
  llm_kwargs: {}
  memory: null
  metadata: null
  output_key: core_terms
  output_parser: !!python/obje

## Run

In [10]:
core_terms = core_terms_chain.run(query)
show(core_terms)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Always response with comma separated list of chemical or biological terms identified in prompt.
Human: What is the relationship between Zn2+ and glycolate?[0m

[1m> Finished chain.[0m
- Zn2+
- glycolate



# Find core terms eids

## Create chain construct to query db

### Prepare graph results output parser
Unwraps query output

In [11]:
from langchain.output_parsers.list import ListOutputParser
from typing import List, Dict, Any

class GraphResultsOutputParser(ListOutputParser):
    """Parse the output of a graph query to a list."""

    key: str = 'output'
    separator: str = '\n'

    def get_format_instructions(self) -> str:
        return (
            "Your response should be a list of results, "
            "eg: `[{key:<result1>}, {key:<result2>}]`"
        )
    
    @property
    def _type(self) -> str:
        return "graph-result-list"

    def parse_result(self, result: List[Dict[str, Any]]) -> List[str]:
        """Parse the output of a graph query."""
        return self.parse(
            self.separator.join(
                map(
                    lambda row: row[self.key],
                    result
                )
            )
        )

    def parse(self, text: str) -> str:
        return text

### Prepare chain construct with output parser

In [12]:
from typing import Any, Dict, List, Optional, TypeVar, Generic

from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.pydantic_v1 import Field
from langchain.chains.base import Chain
from langchain.prompts.base import BasePromptTemplate
from langchain.schema import BaseOutputParser

Graph = TypeVar("Graph")

class GraphQueryChain(Chain, Generic[Graph]):
    """Chain for converting terms to eid mappings"""

    graph: Graph = Field(exclude=True)
    graph_exception: Exception = Field(exclude=True, default=Exception)
    aliased_input_keys: List[str] = Field(alias='input_keys', default_factory=lambda: [])
    output_key: str = "results"  #: :meta private:
    output_parser: BaseOutputParser = Field(default_factory=GraphResultsOutputParser)

    # Query to get term eids, input_keys are avaliable as query params
    query: str
        
    @property
    def input_keys(self) -> List[str]:
        return self.aliased_input_keys
        
    @property
    def output_keys(self) -> List[str]:
        return [self.output_key]

    def create_outputs(self, result: List[dict]) -> List[Dict[str, Any]]:
        """Create outputs from response."""
        return {self.output_key: self.output_parser.parse_result(result)}
        
    def _call(
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> Dict[str, Any]:
        """
        Users can modify the following FindEidsChain Class Variables:

        :var top_k: The maximum number of AQL Query Results to return
        :type top_k: int
        """
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()

        graph_result = []

        #####################
        # Execute Query #

        try:
            graph_result = self.graph.query(self.query, inputs)
        except self.graph_exception as e:
            _run_manager.on_text(
                "Graph Query Execution Error: ", end="\n", verbose=self.verbose
            )
            _run_manager.on_text(
                e, color="yellow", end="\n\n", verbose=self.verbose
            )
        else:   
            _run_manager.on_text("Graph Result:", end="\n", verbose=self.verbose)
            _run_manager.on_text(
                str(graph_result), color="green", end="\n", verbose=self.verbose
            )
        #####################

        # Return results #
        return self.create_outputs(graph_result)

    @property
    def _chain_type(self) -> str:
        return "graph_query_chain"

## Get graph reference

In [13]:
from llmlib.database import Neo4j

In [14]:
graph = Neo4j().graph()

## Create chain

In [15]:
query_output_key = 'output'
EID_MAPPING_KEY = 'eid_mapping'
find_eids_chain = GraphQueryChain(
    query=f"""
        UNWIND ${CORE_TERMS_KEY} AS term
        MATCH (s:Synonym {{lowercase_name: toLower(term)}})<-[:HAS_SYNONYM]-(n)
        WHERE n.eid IS NOT NULL
        WITH DISTINCT s.name as term, collect(n.eid) as eids
        RETURN apoc.text.format(
            "{{term: %s, eid: [%s]}}",
            [term, apoc.text.join(eids, ',')]
        ) as {query_output_key}
    """,
    input_keys=[CORE_TERMS_KEY],
    output_key=EID_MAPPING_KEY,
    graph=graph,
    output_parser=GraphResultsOutputParser(key=query_output_key),
    verbose=True
)
show(find_eids_chain)



memory=None callbacks=None callback_manager=None verbose=True tags=None metadata=None graph=<langchain.graphs.neo4j_graph.Neo4jGraph object at 0x7f4e46640250> graph_exception=<class 'Exception'> aliased_input_keys=['core_terms'] output_key='eid_mapping' output_parser=GraphResultsOutputParser(key='output', separator='\n') query='\n        UNWIND $core_terms AS term\n        MATCH (s:Synonym {lowercase_name: toLower(term)})<-[:HAS_SYNONYM]-(n)\n        WHERE n.eid IS NOT NULL\n        WITH DISTINCT s.name as term, collect(n.eid) as eids\n        RETURN apoc.text.format(\n            "{term: %s, eid: [%s]}",\n            [term, apoc.text.join(eids, \',\')]\n        ) as output\n    '


### Run

In [16]:
eid_mapping = find_eids_chain.run(core_terms)
show(eid_mapping)



[1m> Entering new GraphQueryChain chain...[0m
Graph Result:
[32;1m[1;3m[{'output': '{term: Zn2+, eid: [ZN+2,27363,29105]}'}, {'output': '{term: glycolate, eid: [GLYCOLLATE,C031149,29805]}'}][0m

[1m> Finished chain.[0m
{term: Zn2+, eid: [ZN+2,27363,29105]}
{term: glycolate, eid: [GLYCOLLATE,C031149,29805]}


## Combine chains test

### Combine chains

In [17]:
from langchain.chains import SimpleSequentialChain
chain = SimpleSequentialChain(
    chains=[
        core_terms_chain,
        find_eids_chain
    ],
    verbose=True
)

### Test

In [18]:
output = chain.run(query)



[1m> Entering new SimpleSequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Always response with comma separated list of chemical or biological terms identified in prompt.
Human: What is the relationship between Zn2+ and glycolate?[0m

[1m> Finished chain.[0m
[36;1m[1;3m['Zn2+', 'glycolate'][0m


[1m> Entering new GraphQueryChain chain...[0m
Graph Result:
[32;1m[1;3m[{'output': '{term: Zn2+, eid: [ZN+2,27363,29105]}'}, {'output': '{term: glycolate, eid: [GLYCOLLATE,C031149,29805]}'}][0m

[1m> Finished chain.[0m
[33;1m[1;3m{term: Zn2+, eid: [ZN+2,27363,29105]}
{term: glycolate, eid: [GLYCOLLATE,C031149,29805]}[0m

[1m> Finished chain.[0m


# Respond to inquiry given database context

## Get graph reference

In [19]:
from llmlib.database import Neo4j

In [20]:
graph = Neo4j().graph()

### Show schema preview

In [21]:
show(graph.schema)


        Node properties are the following:
        [{'properties': [{'property': 'eid', 'type': 'STRING'}, {'property': 'entry2_type', 'type': 'STRING'}, {'property': 'data_source', 'type': 'STRING'}, {'property': 'entry1_type', 'type': 'STRING'}, {'property': 'type', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}, {'property': 'name', 'type': 'STRING'}, {'property': 'pmid', 'type': 'STRING'}, {'property': 'pub_year', 'type': 'INTEGER'}, {'property': 'title', 'type': 'STRING'}, {'property': 'journal', 'type': 'STRING'}, {'property': 'sentence', 'type': 'STRING'}], 'labels': 'db_Literature'}, {'properties': [{'property': 'eid', 'type': 'STRING'}, {'property': 'entry2_type', 'type': 'STRING'}, {'property': 'data_source', 'type': 'STRING'}, {'property': 'entry1_type', 'type': 'STRING'}, {'property': 'type', 'type': 'STRING'}, {'property': 'description', 'type': 'STRING'}], 'labels': 'Association'}, {'properties': [{'property': 'eid', 'type': 'STRING'}, {'property': 'dat

## Limit schema

Whole schema is too big to fit into request context - limit it to required parts.

### Set utils

In [22]:
result_parser_factory = lambda key: lambda result: list(map(lambda row: row[key], result))
output_result_parser = result_parser_factory('output')

### Get preselected node properties

In [23]:
preselected_node_properties = [ "displayName", "eid" ]
preselected_node_labels = ["Compound", "Reaction", "EnzReaction", "Regulation"]
node_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "node" AND label in $preselected_node_labels AND property in $preselected_node_properties
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output
"""
node_properties = output_result_parser(
    graph.query(
        node_properties_query,
        dict(
            preselected_node_properties=preselected_node_properties,
            preselected_node_labels=preselected_node_labels
        )
    )
)
show(node_properties)

- labels: Compound
  properties:
  - property: displayName
    type: STRING
  - property: eid
    type: STRING
- labels: EnzReaction
  properties:
  - property: displayName
    type: STRING
  - property: eid
    type: STRING
- labels: Reaction
  properties:
  - property: eid
    type: STRING
  - property: displayName
    type: STRING
- labels: Regulation
  properties:
  - property: eid
    type: STRING
  - property: displayName
    type: STRING



### Get preselected relationship properties

In [24]:
preselected_relationship_types = ["CONSUMED_BY", "PRODUCES", "CATALYZES", "REGULATES"]
rel_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship" AND type in $preselected_relationship_types
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {type: nodeLabels, properties: properties} AS output
"""
relationships_properties = output_result_parser(
    graph.query(
        rel_properties_query,
        dict(
            preselected_relationship_types=preselected_relationship_types,
        )
    )
)
show(relationships_properties)

[]



### Get preselected relationships

In [25]:
rel_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE type = "RELATIONSHIP" AND elementType = "node" AND property in $preselected_relationship_types AND label in $preselected_node_labels
UNWIND other AS other_node
WITH label, property, toString(other_node) as other_node
WHERE other_node in $preselected_node_labels
RETURN {start: label, type: property, end: toString(other_node)} AS output
"""
relationships = output_result_parser(
    graph.query(
        rel_query,
        dict(
            preselected_relationship_types=preselected_relationship_types,
            preselected_node_labels=preselected_node_labels
        )
    )
)
show(relationships)

- end: Reaction
  start: Compound
  type: CONSUMED_BY
- end: Regulation
  start: Compound
  type: REGULATES
- end: Reaction
  start: EnzReaction
  type: CATALYZES
- end: Compound
  start: Reaction
  type: PRODUCES
- end: EnzReaction
  start: Regulation
  type: REGULATES
- end: Reaction
  start: Regulation
  type: REGULATES



### Compose schema

In [26]:
limited_schema = f"""
Node properties are the following:
{node_properties}
Relationship properties are the following:
{relationships_properties}
The relationships are the following:
{relationships}
"""
show(limited_schema)


Node properties are the following:
[{'properties': [{'property': 'displayName', 'type': 'STRING'}, {'property': 'eid', 'type': 'STRING'}], 'labels': 'Compound'}, {'properties': [{'property': 'displayName', 'type': 'STRING'}, {'property': 'eid', 'type': 'STRING'}], 'labels': 'EnzReaction'}, {'properties': [{'property': 'eid', 'type': 'STRING'}, {'property': 'displayName', 'type': 'STRING'}], 'labels': 'Reaction'}, {'properties': [{'property': 'eid', 'type': 'STRING'}, {'property': 'displayName', 'type': 'STRING'}], 'labels': 'Regulation'}]
Relationship properties are the following:
[]
The relationships are the following:
[{'start': 'Compound', 'end': 'Reaction', 'type': 'CONSUMED_BY'}, {'start': 'Compound', 'end': 'Regulation', 'type': 'REGULATES'}, {'start': 'EnzReaction', 'end': 'Reaction', 'type': 'CATALYZES'}, {'start': 'Reaction', 'end': 'Compound', 'type': 'PRODUCES'}, {'start': 'Regulation', 'end': 'EnzReaction', 'type': 'REGULATES'}, {'start': 'Regulation', 'end': 'Reaction', '

### Set schema on the graph

In [27]:
graph.schema = limited_schema
graph.structured_schema = {
    "node_props": {el["labels"]: el["properties"] for el in node_properties},
    "rel_props": {el["type"]: el["properties"] for el in relationships_properties},
    "relationships": relationships,
}
show(graph.structured_schema)

node_props:
  Compound:
  - property: displayName
    type: STRING
  - property: eid
    type: STRING
  EnzReaction:
  - property: displayName
    type: STRING
  - property: eid
    type: STRING
  Reaction:
  - property: eid
    type: STRING
  - property: displayName
    type: STRING
  Regulation:
  - property: eid
    type: STRING
  - property: displayName
    type: STRING
rel_props: {}
relationships:
- end: Reaction
  start: Compound
  type: CONSUMED_BY
- end: Regulation
  start: Compound
  type: REGULATES
- end: Reaction
  start: EnzReaction
  type: CATALYZES
- end: Compound
  start: Reaction
  type: PRODUCES
- end: EnzReaction
  start: Regulation
  type: REGULATES
- end: Reaction
  start: Regulation
  type: REGULATES



## Prepare prompts

In [44]:
from langchain import OpenAI, PromptTemplate

SCHEMA_KEY = 'schema'
EXAMPLES_KEY = 'examples'

cypher_generation_template = f"""
Task:Generate Cypher statements to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
If quering for relationship filter out following eids: ["PROTON"]
Schema:
{{{SCHEMA_KEY}}}
Examples:
{{{EXAMPLES_KEY}}}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statements.
Do not include any text except the generated Cypher statements.
Do not wrap statements in brackets.
Use example queries.

The question is:
{{{QUERY_KEY}}}
"""

cypher_prompt = PromptTemplate(
    input_variables=[SCHEMA_KEY, EXAMPLES_KEY, QUERY_KEY], template=cypher_generation_template
)
show(cypher_prompt)

!!python/object:langchain.prompts.prompt.PromptTemplate
__dict__:
  input_variables:
  - schema
  - examples
  - query
  output_parser: null
  partial_variables: {}
  template: '

    Task:Generate Cypher statements to query a graph database.

    Instructions:

    Use only the provided relationship types and properties in the schema.

    Do not use any other relationship types or properties that are not provided.

    If quering for relationship filter out following eids: ["PROTON"]

    Schema:

    {schema}

    Examples:

    {examples}

    Note: Do not include any explanations or apologies in your responses.

    Do not respond to any questions that might ask anything else than for you to construct
    a Cypher statements.

    Do not include any text except the generated Cypher statements.

    Do not wrap statements in brackets.

    Use example queries.


    The question is:

    {query}

    '
  template_format: f-string
  validate_template: true
__fields_set__: !!set
  in

In [45]:
from langchain.prompts import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate,
)

graph_qa_prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate(
        prompt=PromptTemplate(
            template="You are an expert in field of biology and chemistry with access to graph database.",
            input_variables=[]
        )
    ),
    HumanMessagePromptTemplate(
        prompt=PromptTemplate(
            template=f"""
                Step 1: For given inquiry identify core terms.
                Step 2: Return mapping from core term to list of database 'eid' for related entities.
                Inqury: {{{QUERY_KEY}}}
            """,
            input_variables=[QUERY_KEY],
        )
    ),
    AIMessagePromptTemplate(
        prompt=PromptTemplate(
            template=f"{{{EID_MAPPING_KEY}}}",
            input_variables=[EID_MAPPING_KEY]
        )
    ),
    HumanMessagePromptTemplate(prompt=cypher_prompt)
])
show(graph_qa_prompt_template)

!!python/object:langchain.prompts.chat.ChatPromptTemplate
__dict__:
  input_variables:
  - eid_mapping
  - examples
  - query
  - schema
  messages:
  - !!python/object:langchain.prompts.chat.SystemMessagePromptTemplate
    __dict__: &id013
      additional_kwargs: {}
      prompt: !!python/object:langchain.prompts.prompt.PromptTemplate
        __dict__: &id001
          input_variables: []
          output_parser: null
          partial_variables: {}
          template: You are an expert in field of biology and chemistry with access
            to graph database.
          template_format: f-string
          validate_template: true
        __fields_set__: &id002 !!set
          input_variables: null
          template: null
        __private_attribute_values__:
          _lc_kwargs: &id003
            input_variables: []
            template: You are an expert in field of biology and chemistry with access
              to graph database.
    __fields_set__: &id014 !!set
      prompt: 

In [46]:
qa_prompt_template = f"""You are an assistant that helps to form nice and human understandable answers.
The information part contains the provided information that you can use to construct an answer.
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
Information:
{{context}}

Question:
{{{QUERY_KEY}}}
Helpful Answer:"""
qa_prompt = PromptTemplate(
    input_variables=["context", QUERY_KEY], template=qa_prompt_template
)

## Create chain

#### Adapt GraphCypherQAChain to accept custom input keys

In [47]:
"""Question answering over a graph."""
from __future__ import annotations

import re
from typing import Any, Dict, List, Optional

from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.chains.graph_qa.prompts import CYPHER_GENERATION_PROMPT, CYPHER_QA_PROMPT
from langchain.chains.llm import LLMChain
from langchain.graphs.neo4j_graph import Neo4jGraph
from langchain.pydantic_v1 import Field
from langchain.schema import BasePromptTemplate
from langchain.schema.language_model import BaseLanguageModel

INTERMEDIATE_STEPS_KEY = "intermediate_steps"


def extract_cypher(text: str) -> str:
    """Extract Cypher code from a text.

    Args:
        text: Text to extract Cypher code from.

    Returns:
        Cypher code extracted from the text.
    """
    # The pattern to find Cypher code enclosed in triple backticks
    pattern = r"```(.*?)```"

    # Find all matches in the input text
    matches = re.findall(pattern, text, re.DOTALL)

    return matches[0] if matches else text


def construct_schema(
    structured_schema: Dict[str, Any],
    include_types: List[str],
    exclude_types: List[str],
) -> str:
    """Filter the schema based on included or excluded types"""

    def filter_func(x: str) -> bool:
        return x in include_types if include_types else x not in exclude_types

    filtered_schema = {
        "node_props": {
            k: v
            for k, v in structured_schema.get("node_props", {}).items()
            if filter_func(k)
        },
        "rel_props": {
            k: v
            for k, v in structured_schema.get("rel_props", {}).items()
            if filter_func(k)
        },
        "relationships": [
            r
            for r in structured_schema.get("relationships", [])
            if all(filter_func(r[t]) for t in ["start", "end", "type"])
        ],
    }

    return (
        f"Node properties are the following: \n {filtered_schema['node_props']}\n"
        f"Relationships properties are the following: \n {filtered_schema['rel_props']}"
        "\nRelationships are: \n"
        + str(
            [
                f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
                for el in filtered_schema["relationships"]
            ]
        )
    )


class GraphCypherQAChain(Chain):
    """Chain for question-answering against a graph by generating Cypher statements."""

    graph: Neo4jGraph = Field(exclude=True)
    cypher_generation_chain: LLMChain
    qa_chain: LLMChain
    graph_schema: str
    input_key: str = "query"  #: :meta private:
    output_key: str = "result"  #: :meta private:
    top_k: int = 10
    """Number of results to return from the query"""
    return_intermediate_steps: bool = False
    """Whether or not to return the intermediate steps along with the final answer."""
    return_direct: bool = False
    """Whether or not to return the result of querying the graph directly."""

    @property
    def input_keys(self) -> List[str]:
        """Return the input keys.

        :meta private:
        """
        return [self.input_key]

    @property
    def output_keys(self) -> List[str]:
        """Return the output keys.

        :meta private:
        """
        _output_keys = [self.output_key]
        return _output_keys

    @property
    def _chain_type(self) -> str:
        return "graph_cypher_chain"

    @classmethod
    def from_llm(
        cls,
        llm: Optional[BaseLanguageModel] = None,
        *,
        qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
        cypher_prompt: BasePromptTemplate = CYPHER_GENERATION_PROMPT,
        cypher_llm: Optional[BaseLanguageModel] = None,
        qa_llm: Optional[BaseLanguageModel] = None,
        exclude_types: List[str] = [],
        include_types: List[str] = [],
        **kwargs: Any,
    ) -> GraphCypherQAChain:
        """Initialize from LLM."""

        if not cypher_llm and not llm:
            raise ValueError("Either `llm` or `cypher_llm` parameters must be provided")
        if not qa_llm and not llm:
            raise ValueError("Either `llm` or `qa_llm` parameters must be provided")
        if cypher_llm and qa_llm and llm:
            raise ValueError(
                "You can specify up to two of 'cypher_llm', 'qa_llm'"
                ", and 'llm', but not all three simultaneously."
            )

        qa_chain = LLMChain(llm=qa_llm or llm, prompt=qa_prompt)
        cypher_generation_chain = LLMChain(llm=cypher_llm or llm, prompt=cypher_prompt)

        if exclude_types and include_types:
            raise ValueError(
                "Either `exclude_types` or `include_types` "
                "can be provided, but not both"
            )

        graph_schema = construct_schema(
            kwargs["graph"].structured_schema, include_types, exclude_types
        )

        return cls(
            graph_schema=graph_schema,
            qa_chain=qa_chain,
            cypher_generation_chain=cypher_generation_chain,
            **kwargs,
        )

    def _call(
        self,
        inputs: Dict[str, Any],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> Dict[str, Any]:
        """Generate Cypher statement, use it to look up in db and answer question."""
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        callbacks = _run_manager.get_child()

        intermediate_steps: List = []

        generated_cypher = self.cypher_generation_chain.run(
            {**inputs, "schema": self.graph_schema}, callbacks=callbacks
        )

        # Extract Cypher code if it is wrapped in backticks
        generated_cypher = extract_cypher(generated_cypher)

        _run_manager.on_text("Generated Cypher:", end="\n", verbose=self.verbose)
        _run_manager.on_text(
            generated_cypher, color="green", end="\n", verbose=self.verbose
        )

        intermediate_steps.append({"query": generated_cypher})

        context = []
        for statement in generated_cypher.split(';'):
            if statement:
                # Retrieve and limit the number of results
                context += self.graph.query(statement)[: self.top_k]

        if self.return_direct:
            final_result = context
        else:
            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
            _run_manager.on_text(
                str(context), color="green", end="\n", verbose=self.verbose
            )

            intermediate_steps.append({"context": context})

            result = self.qa_chain(
                {**inputs, "context": context},
                callbacks=callbacks,
            )
            final_result = result[self.qa_chain.output_key]

        chain_result: Dict[str, Any] = {self.output_key: final_result}
        if self.return_intermediate_steps:
            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps

        return chain_result

In [59]:
RESPONSE_KEY = 'response'
graph_qa_chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=graph,
    verbose=True,
    cypher_prompt=graph_qa_prompt_template,
    qa_prompt=qa_prompt,
    output_key=RESPONSE_KEY,
    return_intermediate_steps=True
)
show(graph_qa_chain)



memory=None callbacks=None callback_manager=None verbose=True tags=None metadata=None graph=<langchain.graphs.neo4j_graph.Neo4jGraph object at 0x7f4e46640250> cypher_generation_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['eid_mapping', 'examples', 'query', 'schema'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='You are an expert in field of biology and chemistry with access to graph database.', template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], output_parser=None, partial_variables={}, template="\n                Step 1: For given inquiry identify core terms.\n                Step 2: Return mapping from core term to list of database 'eid' for related enti

### Declare query examples

In [68]:
all_relation_type_str = "|".join(preselected_relationship_types)
max_search_depth = 5
examples=[
    f"""
    // Get shortest path between nodes
    MATCH (a), (b)
    WHERE a.eid in <list_of_term_a_eids> AND b.eid in <list_of_term_b_eids>
    MATCH path=allShortestPaths((a)-[:{all_relation_type_str}*1..{max_search_depth}]-(b))
    RETURN [e in apoc.path.elements(path) | coalesce(e.eid, type(e))] as path LIMIT 1;
    """,
    """
    // Get nodes information
    UNWIND <list_of_eids> as eid
    MATCH (node {{eid: eid}}
    RETURN node;
    """
]
show(*examples)


    // Get shortest path between nodes
    MATCH (a), (b)
    WHERE a.eid in <list_of_term_a_eids> AND b.eid in <list_of_term_b_eids>
    MATCH path=allShortestPaths((a)-[:CONSUMED_BY|PRODUCES|CATALYZES|REGULATES*1..5]-(b))
    RETURN [e in apoc.path.elements(path) | coalesce(e.eid, type(e))] as path LIMIT 1;
    

    // Get nodes information
    UNWIND <list_of_eids> as eid
    MATCH (node {{eid: eid}}
    RETURN node;
    


### Declare run parameters

In [61]:
graph_qa_chain_inputs = {
    QUERY_KEY: query,
    EID_MAPPING_KEY: eid_mapping,
    EXAMPLES_KEY: examples
}
show(graph_qa_chain_inputs)

eid_mapping: '{term: Zn2+, eid: [ZN+2,27363,29105]}

  {term: glycolate, eid: [GLYCOLLATE,C031149,29805]}'
examples:
- "\n    // Get shortest path between nodes\n    MATCH (a), (b)\n    WHERE a.eid in\
  \ <term_a_eids> AND b.eid in <term_b_eids>\n    MATCH path=allShortestPaths((a)-[:CONSUMED_BY|PRODUCES|CATALYZES|REGULATES*1..5]-(b))\n\
  \    RETURN [e in apoc.path.elements(path) | coalesce(e.eid, type(e))] as path LIMIT\
  \ 1\n    "
- "\n    // Get nodes information\n    UNWIND <list_of_eids> as eid\n    MATCH (node\
  \ {{eid: eid}}\n    RETURN node;\n    "
query: What is the relationship between Zn2+ and glycolate?



### Run

In [51]:
graph_qa_chain.run(graph_qa_chain_inputs)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m// Get relationship between Zn2+ and glycolate
MATCH (a:Compound {eid: 'ZN+2'}), (b:Compound {eid: 'GLYCOLLATE'})
MATCH path=(a)-[:CONSUMED_BY|REGULATES|PRODUCES|CATALYZES*1..5]-(b)
RETURN [e in apoc.path.elements(path) | coalesce(e.eid, type(e))] as relationship
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'relationship': ['ZN+2', 'PRODUCES', 'TRANS-RXN-346', 'CONSUMED_BY', 'PROTON', 'PRODUCES', 'RXN0-7173', 'PRODUCES', 'GLYCOLLATE']}][0m

[1m> Finished chain.[0m


'The relationship between Zn2+ and glycolate is that Zn2+ produces trans-RXN-346, which is consumed by proton. Proton then produces RXN0-7173, which in turn produces glycollate.'

### Combine chains

In [63]:
from langchain.chains import SequentialChain
chain = SequentialChain(
    chains=[
        core_terms_chain,
        find_eids_chain,
        graph_qa_chain
    ],
    input_variables=[QUERY_KEY, EXAMPLES_KEY],
    output_variables=[RESPONSE_KEY],
    verbose=True
)

### Test

In [66]:
output = chain.run({
  QUERY_KEY: query,
  EXAMPLES_KEY: examples
})
show(output)



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Always response with comma separated list of chemical or biological terms identified in prompt.
Human: What is the relationship between Zn2+ and glycolate?[0m

[1m> Finished chain.[0m


[1m> Entering new GraphQueryChain chain...[0m
Graph Result:
[32;1m[1;3m[{'output': '{term: Zn2+, eid: [ZN+2,27363,29105]}'}, {'output': '{term: glycolate, eid: [GLYCOLLATE,C031149,29805]}'}][0m

[1m> Finished chain.[0m


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3m// Get relationship between Zn2+ and glycolate
MATCH (a:Compound {eid: 'ZN+2'}), (b:Compound {eid: 'GLYCOLLATE'})
MATCH path=(a)-[:CONSUMED_BY|REGULATES|PRODUCES|CATALYZES*1..5]-(b)
RETURN [e in apoc.path.elements(path) | coalesce(e.eid, type(e))] as relationship
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'relationship': ['ZN+2', 'PRODUCES', 'TRANS-RXN-346', 'CO

In [69]:
output = chain.run({
  QUERY_KEY: "What is the relationship between Zn2+ and glcD?",
  EXAMPLES_KEY: examples
})
show(output)



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Always response with comma separated list of chemical or biological terms identified in prompt.
Human: What is the relationship between Zn2+ and glcD?[0m

[1m> Finished chain.[0m


[1m> Entering new GraphQueryChain chain...[0m
Graph Result:
[32;1m[1;3m[{'output': '{term: Zn2+, eid: [ZN+2,27363,29105]}'}, {'output': '{term: glcD, eid: [61770694,BSU28680,60653535,64200025,59158410,45671443,60669845,61892028,60977289,61724801,36133371,64304595,60766668,64465218,61255578,61770240,61649829,61870746,64355206,61192184,62016165,61081605,61853209,60549819,5866097,61579555,1185220,61843635,60991766,64182690,61179740,61811463,56563334,64322265,61575427,49869220,49611795,64091855,61671506,64300941,61789144,64191852,61684291,61740845,64147591,61717603,61755497,29368954,64444342,64316758,64251095,64057122,58156916,56651156,56687568,58531620,58452692,511328

KeyboardInterrupt: 

In [70]:
output = chain.run({
  QUERY_KEY: "What is the relationship between INHBA and MTMR4?",
  EXAMPLES_KEY: examples
})
show(output)



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Always response with comma separated list of chemical or biological terms identified in prompt.
Human: What is the relationship between INHBA and MTMR4?[0m

[1m> Finished chain.[0m


[1m> Entering new GraphQueryChain chain...[0m
Graph Result:
[32;1m[1;3m[{'output': '{term: Inhba, eid: [119816394,118583887,103736752,104853030,114623192,102914541,114095202,101991500,117714043,109676338,101598630,101976959,116081930,102015383,101704279,100720616,101563494,29200,110333849,116883802,110308293,110556830,16323,114685392,101833064,100762446,107146199,105998333,113197979,120529526,105225678,108266640]}'}, {'output': '{term: INHBA, eid: [121028933,119698030,121333580,120398268,121353070,118989268,118929076,121087860,120591502,121145766,119147703,120235583,120305243,118684293,120878233,118714453,118665963,119241670,120754777,119850385,119534608,11862006

ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Variable `path` not defined (line 3, column 28 (offset: 164))
"RETURN [r in relationships(path) | type(r)] as relationship"
                            ^}