###### Pinecone Vector DB

In [47]:
from pinecone import Pinecone
from dotenv import load_dotenv
import os
load_dotenv()

True

In [48]:
pc = Pinecone(api_key=os.getenv('PINECONE_DB_APPLICATION_TOKEN'))

In [49]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [50]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(
    model='text-embedding-3-small'
    )

In [51]:
embedding_model

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x7f0b881b5a70>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x7f0b881b62c0>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [52]:
from pinecone import ServerlessSpec
index_name='rag'
if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws',region='us-east-1')
    )
index = pc.Index(index_name)

In [53]:
index

<pinecone.db_data.index.Index at 0x7f0bae4ebe30>

In [54]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index,embedding_model)

In [55]:
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [56]:
vector_store.add_documents(documents)

['aecfad1a-1cec-4c89-9c8c-25d25db55eb1',
 '081178c6-64f4-4ec7-a1d4-5aa3c999a91e',
 '5eb7bbb9-51e8-469e-bba1-64a80a81e00f',
 'd91842c4-65fd-4f55-84e2-e4b484dc3908',
 '739b8c64-d2e2-4c0f-9132-75d9a16dd07b',
 '71fd3b65-aa40-490d-9e09-7367dff6184d',
 '33355c2b-6c2f-4d7c-80f1-4ec2af348309',
 '69943f67-4153-4e3d-8020-261c4f898e4a',
 'fc15289c-fc55-440e-8b03-6f092fa27732',
 '4b956bd1-98fc-4ca2-987a-781f57d0a782']

In [58]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,filter={'source':'tweet'}
)
for res in results:
  print(f"* '{res.page_content}',metadata={res.metadata}")

* 'Building an exciting new project with LangChain - come check it out!',metadata={'source': 'tweet'}
* 'LangGraph is the best framework for building stateful, agentic applications!',metadata={'source': 'tweet'}
* 'Wow! That was an amazing movie. I can't wait to see it again.',metadata={'source': 'tweet'}


In [60]:
results = vector_store.similarity_search_with_score(
    "LangChain provides abstractions to make working with LLMs easy",
    k=3,filter={'source':'tweet'}
)
for res,score in results:
  print(f"* [sim={score:.2f}] '{res.page_content}',metadata={res.metadata}")

* [sim=0.43] 'Building an exciting new project with LangChain - come check it out!',metadata={'source': 'tweet'}
* [sim=0.39] 'LangGraph is the best framework for building stateful, agentic applications!',metadata={'source': 'tweet'}
* [sim=0.05] 'Wow! That was an amazing movie. I can't wait to see it again.',metadata={'source': 'tweet'}


In [62]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.7},
)

retriever.invoke("LangChain provides abstractions to make working with LLMs easy",filter={"source":"tweet"})

[Document(id='5eb7bbb9-51e8-469e-bba1-64a80a81e00f', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!')]