In [None]:
!pip install langchain-text-splitters langchain-community langgraph openai==1.55.3 langchain-openai faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [None]:
from google.colab import userdata
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
llm=ChatOpenAI(model='gpt-4o', api_key=userdata.get('openai-key'))
embeddings=OpenAIEmbeddings(model="text-embedding-3-small",api_key=userdata.get('openai-key'))

In [None]:
from langchain_community.vectorstores import FAISS
vector_store=FAISS(embedding_function=embeddings)

TypeError: FAISS.__init__() missing 3 required positional arguments: 'index', 'docstore', and 'index_to_docstore_id'

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
# https://lilianweng.github.io/posts/2023-06-23-agent/
loader=WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content","post-title","post-header")
        )
    ),
)

docs=loader.load()

In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits=text_splitter.split_documents(docs)


In [None]:
vector_store=FAISS.from_documents(all_splits,embeddings)
prompt = hub.pull("rlm/rag-prompt")



In [None]:
class State(TypedDict):
  question: str
  context: List[Document]
  answer: str


In [None]:
def retrieve(state:State):
  retrieved_docs=vector_store.similarity_search(state["question"])
  return {"context":retrieved_docs}

In [None]:
def generate(state:State):
  docs_content = "\n\n".join(doc.page_content for doc in state["context"])
  messages = prompt.invoke({"question": state["question"], "context": docs_content})
  response=llm.invoke(messages)
  return {"answer":response.content}

In [None]:
from langgraph.graph import START,StateGraph

graph_builder=StateGraph(State).add_sequence([retrieve,generate])
graph_builder.add_edge(START,"retrieve")
graph=graph_builder.compile()

In [None]:
result=graph.invoke({"question":"what is task decomposition?"})
print(result['context'])
print(result['answer'])

[Document(id='e42cfa0d-6c2a-43db-ac76-29f38e833383', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'), Document(id='d97b5a35-60d3-4945-93bf-d5897c063b23', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{"task