In [5]:
from langchain_community.vectorstores import Pinecone
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter



In [2]:

loader = DirectoryLoader("./", glob="**/*.txt", loader_cls=TextLoader)
docs = loader.load()

print(f"Found {len(docs)} letters")


Found 21 letters


In [6]:
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)
splits = text_splitter.split_documents(docs)

In [7]:

embedding = OpenAIEmbeddings(model='text-embedding-3-small')
docsearch = PineconeVectorStore.from_documents(splits, embedding, index_name='berkshire-hathaway')


In [8]:
type(docsearch)

langchain_pinecone.vectorstores.PineconeVectorStore

In [9]:
docsearch.similarity_search(query="Coke",k=1)

[Document(id='e8a7da9c-b1a8-4b0a-995d-7bffddb89e23', metadata={'source': 'letters/1989.txt'}, page_content="I continued to note these qualities for the next 52 years as \nCoke blanketed the world. During this period, however, I \ncarefully avoided buying even a single share, instead allocating \nmajor portions of my net worth to street railway companies, \nwindmill manufacturers, anthracite producers, textile businesses, \ntrading-stamp issuers, and the like. (If you think I'm making \nthis up, I can supply the names.) Only in the summer of 1988 did \nmy brain finally establish contact with my eyes.\n\n     What I then perceived was both clear and fascinating. After \ndrifting somewhat in the 1970's, Coca-Cola had in 1981 become a \nnew company with the move of Roberto Goizueta to CEO. Roberto, \nalong with Don Keough, once my across-the-street neighbor in \nOmaha, first rethought and focused the company's policies and \nthen energetically carried them out. What was already the world's