In [1]:
import os
from os.path import join, dirname
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

OPENAI_KEY = os.environ.get("OPENAI_KEY")
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")

In [2]:
from langchain.docstore.document import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

In [3]:
with open("state_of_the_union.txt", "r", encoding="utf-8") as f:
    text = f.read()

# Write the text back to a new file, ensuring it's in UTF-8 encoding
with open("state_of_the_union_utf8.txt", "w", encoding="utf-8") as f:
    f.write(text) 

loader = TextLoader("state_of_the_union_utf8.txt")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)

Created a chunk of size 1163, which is longer than the specified 1000
Created a chunk of size 1015, which is longer than the specified 1000


In [4]:
url = "neo4j+s://edc40677.databases.neo4j.io"
username = "neo4j"
password = NEO4J_PASSWORD


In [5]:
db = Neo4jVector.from_documents(
    docs, embeddings, url=url, username=username, password=password
)

In [6]:
query = "What did the president say about Ketanji Brown Jackson"
docs_with_score = db.similarity_search_with_score(query, k=2)

for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.8882960081100464
So I know the anxieties that are out there right now. They're not new. These struggles are the reason I ran for president. These struggles are what I've witnessed for years in places like Elkhart, Ind., and Galesburg, Ill. I hear about them in the letters that I read each night. The toughest to read are those written by children asking why they have to move from their home, or when their mom or dad will be able to go back to work.

For these Americans and so many others, change has not come fast enough. Some are frustrated; some are angry. They don't understand why it seems like bad behavior on Wall Street is rewarded but hard work on Main Street isn't, or why Washington has been unable or unwilling to solve any of our problems. They are tired of the partisanship and the shouting and the pettiness. They know we can't afford it. Not now.
-------------------------------------------