In [17]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import ServerlessSpec

load_dotenv()
os.environ['PINECONE_API_KEY'] = os.getenv('PINECONE_API_KEY')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

pinecone = Pinecone()
embedding = OpenAIEmbeddings(model='text-embedding-3-small',
                             dimensions=1536
                             )

index_name = "idx-pinecone"
if not pinecone.has_index(index_name):
    pinecone.create_index(
        name=index_name,
        embedding=embedding,
        dimension=1536,
        spec=ServerlessSpec(cloud='aws', region='us-east-1'),
        metric='cosine'
    )
    print(f"index created in Pinecone: '{index_name}'")
else:
    print(f"index already exists in Pinecone: '{index_name}'")

index already exists in Pinecone: 'idx-pinecone'


In [15]:
pc_vector_store = PineconeVectorStore(index=pinecone.Index(index_name), embedding=embedding)

In [21]:
from langchain_core.documents import Document

from langchain_core.documents import Document

documents = [
    Document(
        page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
        metadata={"source": "tweet"},
    ),
    Document(
        page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
        metadata={"source": "news"},
    ),
    Document(
        page_content="Building an exciting new project with LangChain - come check it out!",
        metadata={"source": "tweet"},
    ),
    Document(
        page_content="Robbers broke into the city bank and stole $1 million in cash.",
        metadata={"source": "news"},
    ),
    Document(
        page_content="Wow! That was an amazing movie. I can't wait to see it again.",
        metadata={"source": "tweet"},
    ),
    Document(
        page_content="Is the new iPhone worth the price? Read this review to find out.",
        metadata={"source": "website"},
    ),
    Document(
        page_content="The top 10 soccer players in the world right now.",
        metadata={"source": "website"},
    ),
    Document(
        page_content="LangGraph is the best framework for building stateful, agentic applications!",
        metadata={"source": "tweet"},
    ),
    Document(
        page_content="I have a bad feeling I am going to get deleted :(",
        metadata={"source": "tweet"},
    ),
]
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [22]:
pc_vector_store.add_documents(documents)

['4b1d5100-01b5-4709-981c-d5b77ff175b5',
 '1cb018e2-6a72-47b9-90f7-1659cee9147e',
 '58a5a267-0f28-4509-8a6f-a09f5987f08b',
 'eb622f4e-6ea5-4a92-8104-adc7b0ca88b1',
 'a033b274-2577-4797-b52a-3cf6aa1eac01',
 '7eb84b91-4986-4a6d-ba52-fbb25077168a',
 '03497176-384d-4880-bed1-7dd5270a83ed',
 '1b0cc45c-10e8-4c03-b6cb-0857d64e9295',
 'cfd84c9e-fb47-4e59-9d04-c8355b77861d']

In [23]:
pc_vector_store.similarity_search("what's the weather?")

[Document(id='1cb018e2-6a72-47b9-90f7-1659cee9147e', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='cfd84c9e-fb47-4e59-9d04-c8355b77861d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='4b1d5100-01b5-4709-981c-d5b77ff175b5', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(id='eb622f4e-6ea5-4a92-8104-adc7b0ca88b1', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

In [26]:
filter = {"source": "website"}
query = "what's the weather?"

pc_vector_store.similarity_search(query, filter=filter)

[Document(id='03497176-384d-4880-bed1-7dd5270a83ed', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(id='7eb84b91-4986-4a6d-ba52-fbb25077168a', metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.')]

In [29]:
filter = {"source": {"$in": ["website", "tweet"]}}
query = "what's the weather?"

pc_vector_store.similarity_search(query, filter=filter, k=1)

[Document(id='cfd84c9e-fb47-4e59-9d04-c8355b77861d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [30]:
pc_vector_store.similarity_search_with_score(query="will it be hot weather tomorrow?", k=3)

[(Document(id='1cb018e2-6a72-47b9-90f7-1659cee9147e', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
  0.602578163),
 (Document(id='cfd84c9e-fb47-4e59-9d04-c8355b77861d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
  0.209217072),
 (Document(id='4b1d5100-01b5-4709-981c-d5b77ff175b5', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
  0.131456375)]

In [31]:
retriever = pc_vector_store.as_retriever(search_kwargs={"k": 3})
retriever

VectorStoreRetriever(tags=['PineconeVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000016574B62590>, search_kwargs={'k': 3})

In [32]:
retriever.invoke(query)

[Document(id='1cb018e2-6a72-47b9-90f7-1659cee9147e', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='cfd84c9e-fb47-4e59-9d04-c8355b77861d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='4b1d5100-01b5-4709-981c-d5b77ff175b5', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

In [36]:
filter = {"source": {"$in": ["website", "tweet"]}}
query = "what's the weather?"

results = pc_vector_store.similarity_search(query, k=3, filter={"source": {"$in": ["website", "tweet"]}})
results

[Document(id='cfd84c9e-fb47-4e59-9d04-c8355b77861d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='4b1d5100-01b5-4709-981c-d5b77ff175b5', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(id='03497176-384d-4880-bed1-7dd5270a83ed', metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.')]