In [10]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from pinecone import Pinecone

In [11]:
import os
pinecone_api_key = os.getenv("PINECONE_API_KEY")

In [12]:
pc = Pinecone(api_key=pinecone_api_key)

In [13]:
# Serverless: Server will be managed by the cloud provider
from pinecone import ServerlessSpec

In [14]:
index_name = 'agenticai'

In [None]:
# Creating the index
if not pc.has_index(index_name):
        pc.create_index(
        name = index_name,
        dimension = 384,
        metric = 'cosine',
        spec = ServerlessSpec(cloud='aws', region='us-east-1')
    )

In [16]:
# Loading the index
index = pc.Index(index_name)

In [17]:
from langchain_pinecone import PineconeVectorStore

In [18]:
vector_store = PineconeVectorStore(index=index, embedding = embeddings)

In [23]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

In [24]:
documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [25]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [26]:
uuids = [str(uuid4()) for _ in range(len(documents))]

In [27]:
uuids

['80bfdd64-4be4-4551-9bcc-3d672b3de0a5',
 '32c4702c-a865-44e6-bf11-01583e6efc49',
 'b0af9bd7-4168-4f4c-b968-4a819086b98c',
 '088248bb-1a7f-4ed3-9f78-4fdda1e5067b',
 'd5dc66df-3949-43d5-a466-b8f37c9ee84c',
 'c38d76de-1a02-4e89-bd69-50e73f071973',
 'faa75b81-eb62-4d14-946c-74ea7a3ab159',
 'd9cf5505-7230-4c73-9c87-2f7ae2400dd9',
 '0eaf790c-f7f6-4ced-8fdd-67b3efdf3808',
 'b61078fe-43d3-438b-a7a9-521e66cd6143']

In [28]:
vector_store.add_documents(documents = documents, ids = uuids)

['80bfdd64-4be4-4551-9bcc-3d672b3de0a5',
 '32c4702c-a865-44e6-bf11-01583e6efc49',
 'b0af9bd7-4168-4f4c-b968-4a819086b98c',
 '088248bb-1a7f-4ed3-9f78-4fdda1e5067b',
 'd5dc66df-3949-43d5-a466-b8f37c9ee84c',
 'c38d76de-1a02-4e89-bd69-50e73f071973',
 'faa75b81-eb62-4d14-946c-74ea7a3ab159',
 'd9cf5505-7230-4c73-9c87-2f7ae2400dd9',
 '0eaf790c-f7f6-4ced-8fdd-67b3efdf3808',
 'b61078fe-43d3-438b-a7a9-521e66cd6143']

In [29]:
results = vector_store.similarity_search('What does langchain provide to us?', k=3)

In [30]:
results

[Document(id='b0af9bd7-4168-4f4c-b968-4a819086b98c', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='d9cf5505-7230-4c73-9c87-2f7ae2400dd9', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='c38d76de-1a02-4e89-bd69-50e73f071973', metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.')]

In [None]:
retriever=vector_store.as_retriever(
        search_type = 'similarity_score_threshold',
        search_kwargs={"k": 3, "score_threshold": 0.5}
    )

In [32]:
retriever.invoke("LangChain provides abstractions to make working with LLMs easy")

[Document(id='b0af9bd7-4168-4f4c-b968-4a819086b98c', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='d9cf5505-7230-4c73-9c87-2f7ae2400dd9', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='b61078fe-43d3-438b-a7a9-521e66cd6143', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]