In [None]:
from langchain.graphs import Neo4jGraph

NEO4J_URI="bolt://localhost:7687"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="LimeStardom6J"

graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD
)

In [None]:
graph.query("""
CALL db.index.vector.createNodeIndex(
  'wikipediaVector', // index name
  'ChunkVector',     // node label
  'embedding', // node property
   1536,       // vector size
   'cosine'    // similarity metric
)
""")

In [22]:
graph.query("""
WITH [1, [1,2,3], ["2","5"], [x in range(0, 1535) | toFloat(x)]] AS exampleValues
UNWIND range(0, size(exampleValues) - 1) as index
CREATE (:ChunkVector {embedding: exampleValues[index], index: index})
""")

[]

In [23]:
graph.query("""
CALL db.index.vector.queryNodes(
  'wikipediaVector', // index name
  3, // topK neighbors to return
  [x in range(0,1535) | toFloat(x) / 2] //input vector
)
YIELD node, score
RETURN node.index AS index, score
""")

[{'index': 3, 'score': 1.0}]

In [None]:
import wikipedia

bg3 = wikipedia.page(pageid=60979422)
print(bg3.content)

In [None]:
import os, sys
from langchain.embeddings import OpenAIEmbeddings

os.environ["OPENAI_API_KEY"] = "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"

In [None]:
embeddings = OpenAIEmbeddings()

chunks = [{'text':el, 'embedding': embeddings.embed_query(el)} for
                  el in bg3.content.split("\n\n") if len(el) > 50]

In [None]:
graph.query("""
UNWIND $data AS row
CREATE (c:Chunk {text: row.text})
WITH c, row
CALL db.create.setVectorProperty(c, 'embedding', row.embedding)
YIELD node
RETURN distinct 'done'
""", {'data': chunks})

In [None]:
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering.stuff_prompt import CHAT_PROMPT
from langchain.callbacks.manager import CallbackManagerForChainRun

from typing import Any, Dict, List
from pydantic import Field


In [21]:
vector_search = """
WITH $embedding AS e
CALL db.index.vector.queryNodes('wikipedia', $k, e) yield node, score
RETURN node.text AS result
ORDER BY score DESC
LIMIT 3
"""

In [None]:
class Neo4jVectorChain(Chain):
    graph: Neo4jGraph = Field(exclude=True)
    input_key:  str = 'query'
    output_key: str = 'result'
    embeddings: OpenAIEmbeddings = OpenAIEmbeddings()
    qa_chain: LLMChain = LLMChain(llm=ChatOpenAI(temperature=0), prompt=CHAT_PROMPT)

    @property
    def input_keys(self) -> List[str]:
        return [self.input_key]

    @property
    def output_keys(self) -> List[str]:
        _output_keys = [self.output_key]
        return _output_keys

    def _call(self, inputs: Dict[str, str], run_manager, key=3) -> Dict[str, Any]:
        question = inputs[self.input_key]
        embedding = self.embeddings.embed_query(question)

        run_manager.on_text("Vector search embeddings:", end='\n', verbose=self.verbose)
        run_manager.on_text(embedding[:5], color='green', end='\n', verbose=self.verbose)

        context = self.graph.query(vector_search, {'embedding': embedding, 'k': 3})
        context = [el['result'] for el in context]

        run_manager.on_text('Retrieved context:', end='\n', verbose=self.verbose)
        run_manager.on_text(context, color='green', end='\n', verbose=self.verbose)

        result = self.qa_chain({'question': question, 'context': context})
        final_result = result[self.qa_chain.output_key]