In [1]:
# Indexing 
# -- Load the data
# -- Split the data
# -- store the data in the vector embeddings


In [25]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

load_dotenv()
index_name = "rag-model"
dimension_size = 768 

def check_pc_index(index_name):
    try: 
        index = pc.describe_index(index_name)
        return True
    except:
        return False

os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
pc = Pinecone(api_key = os.getenv("PINECONE_API_KEY"))
if not check_pc_index(index_name):
    print(pc.list_indexes())
    spec = ServerlessSpec(cloud="aws",
                          region="us-east-1")
    pc.create_index(index_name, 
                    dimension=dimension_size,
                    spec=spec)
index = pc.Index(index_name)

llm = ChatGroq(model="llama-3.1-8b-instant")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = PineconeVectorStore(index=index,
                                   embedding=embeddings)


In [37]:
# Load the website based docs
# -- the soupstrainer can handle the specific elements of the web doc 
# -- -- web based loader holds the web page and loads the page 
# -- -- -- load the page into the docs object
# -- -- Add the splits to the index 


import bs4

from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
loader = WebBaseLoader(
    web_paths=(
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
    ),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=(
                "post-content",
                "post-title",
                "post-handler"
            )
        )
    )
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                               chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [49]:
# Store the vectors into DB 
doc_ids = vector_store.add_documents(documents=splits)

['1215cbdd-af2a-4457-985e-f2d9c5da8826', '9cf2c357-c8c7-448e-b162-98af2f03ea72', '28c8820e-72c3-4fe2-82eb-69fd7d537b43', '2bab7791-77fb-4da7-99a0-cab03d3e166c', 'f1a63d13-d1dc-48c7-ba69-dda6eacc5977', 'c7d0624e-ec49-4a63-b5ba-c375f954238f', 'b4f3527d-2200-41f7-bc73-93d0c7eaa530', 'a96d593d-8dac-4a38-9cb1-bf59bfe4d2fe', '96c5993a-70a2-473d-badc-b07c63c48093', '67a04522-d6b7-4b27-897f-0236826e53ef', '7c720e39-f010-4e60-b103-697d429b3aa5', 'd21d0c6a-21ee-4a10-bbd1-8d0272b65700', '74da3fa6-6c5a-431e-8a9d-2c24498eb382', '606d003a-8a36-4577-a4a7-7370de85af27', 'eb324045-a48b-4332-ad4f-b03c6649e9bd', '849b7f35-a325-471b-9e02-d110effa498a', '60fe61c1-4df8-4e40-8eba-b10b671fb837', '4a0283e9-c581-4b9f-9dd1-a202203a508d', '83670fc4-e7b2-4184-840f-e5dbac05f4e1', '78122177-f4f5-4455-a84e-7b54ccf3194f', 'c724b768-b024-4bc8-9b51-909f0144e928', 'a985dae6-0757-40dd-9a2c-f8696ec27eae', '39a83474-31d5-4452-be40-e161d69669c4', 'b69b3e18-470e-4a7c-8bdb-da102d702234', '006659fe-eb2a-461b-bed5-8af8e3a40489',

In [58]:
# Retrival and generation

from langchain import hub
prompt = hub.pull("rlm/rag-prompt")
message = prompt.invoke({
    "context" : "Here is the context for the question",
    "question" : "Here is the question for you to answer"
 }).to_messages()
print(message[0].content)



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: Here is the question for you to answer 
Context: Here is the context for the question 
Answer:


In [59]:
# Use langgraph to tie the retrival and generation together
# -- Define a state that can be carried through different agents

from langchain_core.documents import Document 
from typing_extensions import List, TypedDict

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def retriver(state: State):
    retrived_docs = vector_store.similarity_search(state['question'])
    return {
        "question": state["question"],
        "context": retrived_docs
    }

def generation(state: State):
    docs_content = '\n\n'.join([doc.page_content for doc in state['context']])
    messages = prompt.invoke({
        "question": state['question'],
        "context": docs_content
    })
    response = llm.invoke(messages)
    return {
        "question": state['question'],
        "context": docs_content,
        "answer": response
    }


In [60]:
# Build a graph out of the above retrival and augumentation
# -- using a sequence for the simplicity
from langgraph.graph import START, StateGraph, END

graph_builder = StateGraph(State).add_sequence([retriver, generation])
graph_builder.add_edge(START, "retriver")
graph = graph_builder.compile()

In [64]:
# from IPython.display import Image, display

# display(Image(graph.get_graph().draw_mermaid_png())) - display the simple graph 

result = graph.invoke({
    "question": "What is task decomposition"
})
print(f"Context: {result['context']}")
print(f"Answer: {result['answer']}")

Context: Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.

(3) Task execution: Expert models execute on the specific tasks and log results.
Instruction:

With the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execu

In [65]:
# Streaming a graph 
for step in graph.stream(
    {"question": "What is task decomposition ?"}, 
    stream_mode="updates"
):
    print(f"Step : {step} \n\n\n")

Step : {'retriver': {'question': 'What is task decomposition ?', 'context': [Document(id='2bab7791-77fb-4da7-99a0-cab03d3e166c', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'), Document(id='6d628be6-d9d8-4cdc-ba97-d036f77ca753', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, p

In [69]:
# Custom prompt model 
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

rag_prompt = PromptTemplate.from_template(template)


In [71]:
# Query Analysis
# -- one step ahead from brute force 
# -- -- create multiple queries from the actual input query
# -- create metadata for teh documents 

total_documents = len(splits)
third = total_documents // 3 

for index, document in enumerate(splits):
    if index < third:
        document.metadata["section"] = "beginning"
    elif index < 2 * third:
        document.metadata["section"] = "middle"
    else:
        document.metadata["section"] = "enc"
    
splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'text': 'LLM Powered Autonomous Agents\n    Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\nMemory',
 'section': 'beginning

In [150]:
splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'text': 'LLM Powered Autonomous Agents\n    Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\nMemory',
 'section': 'beginning

In [75]:
from pinecone import Pinecone

pc = Pinecone(api_key = os.getenv("PINECONE_API_KEY"))
index_name = "rag-model-2"
if not check_pc_index(index_name):
    spec = ServerlessSpec(cloud="aws",
                          region="us-east-1")
    pc.create_index(index_name, 
                    dimension=dimension_size,
                    spec=spec)
    
index = pc.Index(index_name)

In [76]:
vector_store = PineconeVectorStore(embedding=embeddings,
                                   index=index)
ids = vector_store.add_documents(splits)

In [175]:
from typing import Literal
from typing_extensions import Annotated

class Search(TypedDict):
    # """
    #    query with out the section and summrised query, and section in the query
    # """
    """Search query."""
    query: Annotated[str, ..., "Search Query to run "]
    section: Annotated[
        Literal["beginning", "middle", "end"],
        ...,
        "section to query",
    ]

class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str

def analyze_query(state: State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {
        "question": state["question"],
        "query": query
    }

def filter_by_section(doc, section) -> bool:
    print(doc, section)
    doc.metadata.get("section") == section

def retriver_new(state: State):
    query = state["query"]
    # print(vector_store.similarity_search(
    #     query["query"],
    #     filter= lambda doc: filter_by_section(doc, query['section'])
    # ))
    retrived_docs = vector_store.similarity_search(
        query["query"],
        filter={
            "section": query['section']
        },
        k=3
    )
    # print(context)
    return {
        "query": query,
        "context": retrived_docs
    }

graph_builder = StateGraph(State).add_sequence([analyze_query,retriver_new, generation])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [176]:
for step in graph.stream(
    {"question": "What does the end of the post say about Task Decomposition?"},
    stream_mode="updates",
):
    print(f"{step}.....\n\n")


{'analyze_query': {'question': 'What does the end of the post say about Task Decomposition?', 'query': {'query': 'Task Decomposition', 'section': 'end'}}}.....


{'retriver_new': {'query': {'query': 'Task Decomposition', 'section': 'end'}, 'context': []}}.....


{'generation': {'question': 'What does the end of the post say about Task Decomposition?', 'context': '', 'answer': AIMessage(content='However, there is no context provided. Please provide the context, and I will be happy to assist you in answering your question.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 106, 'total_tokens': 133, 'completion_time': 0.041344957, 'prompt_time': 0.011093972, 'queue_time': 0.018199255, 'total_time': 0.052438929}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_f66ccb39ec', 'finish_reason': 'stop', 'logprobs': None}, id='run-3d7e32d1-e955-4941-955e-57d88b709cc4-0', usage_metadata={'input_tokens': 106, 'output_tokens': 27, 'to