In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
len (embeddings.embed_query("This is a test query to generate embeddings."))

In [None]:
from pinecone import Pinecone
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone( api_key=pinecone_api_key)

In [None]:
from pinecone import ServerlessSpec # Create a serverless spec
index_name = "vivek-test-index"

if not pc.has_index(index_name):    # Create the index if it does not exist 
    pc.create_index(
        name=index_name,
        dimension=384,  # Dimension of the embeddings
        metric="cosine",  # Similarity metric
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
)


In [None]:
index = pc.Index(index_name)  # Connect to the index
index

In [None]:
from langchain_pinecone import PineconeVectorStore

In [None]:
vector_store = PineconeVectorStore(
    index=index,
    embedding=embeddings)

In [None]:
result = vector_store.similarity_search("What is langchain")
result

In [None]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},#additional info
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)


In [None]:
documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [None]:
documents

In [None]:
uuids = [str(uuid4()) for _ in range(len(documents))]
uuids

In [None]:
vector_store.add_documents(documents=documents, ids=uuids)

In [None]:
result = vector_store.similarity_search("What is langchain", k = 1)
result

In [52]:
retriever = vector_store.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.8})


In [53]:
retriever.invoke("What is langchain")

[Document(id='42d98e06-f495-4ab4-acac-47030512ce48', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!')]

In [54]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate(
    [
        ("system", "{context}"),
        ("user", "{question}"),
    ]
)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='{context}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})])

In [55]:
model = ChatGroq(model="gemma2-9b-it",temperature=0.0, max_tokens=5000)


In [56]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
parser = StrOutputParser()

In [57]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])  

In [58]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough() } |
    prompt |
    model |
    parser
)

In [60]:
rag_chain.invoke("What is langchain?")

"That's awesome! I'm always excited to hear about new projects using my abilities. \n\nLangChain is a fantastic framework for building applications with large language models (LLMs) like me.  Think of it as a toolbox specifically designed to make working with LLMs easier and more powerful. \n\nHere's a breakdown of what makes LangChain so special:\n\n**Key Features:**\n\n* **Chains:** LangChain lets you string together multiple LLMs and other tools (like search engines, databases, APIs) into sophisticated workflows. Imagine asking a question that requires multiple steps to answer – LangChain can handle that seamlessly.\n* **Agents:**  You can create autonomous agents that can interact with the world. These agents can use LLMs to understand instructions, plan actions, and execute them using other tools.\n* **Memory:** LangChain provides ways to give your applications a memory, so they can remember past interactions and context. This is crucial for building conversational AI that feels m