# install dependencies:


In [None]:
! pip install chromadb langchain langchain_community langchainhub langchain-core


In [None]:
! pip -q install sentence_transformers

In [None]:
!curl https://ollama.ai/install.sh | sh

In [None]:
!pip install colab-xterm
%load_ext colabxterm

 # Import packages:

In [None]:
import bs4
from langchain import hub
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


# Load Documents & and index them:

In [None]:
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()


In [None]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)


In [None]:
embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-base")

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

retriever = vectorstore.as_retriever()


# Run the Terminal inside the NoteBook:

* Then we need to install **ollama** inside of the running terminal:
```
curl https://ollama.ai/install.sh | sh
```
* When Ollama is installed we can now serve it and pull a LLM of choice:
```
ollama serve & ollama pull llama3
```
* Here I pick `llama3` but you can go with any model you like.
* The final step is to run the model:
``` ollama run llama3
```
    - this step will take a bit of time..  
**PS: all of the 3 steps above should be executed inside of terminal window**    

In [None]:
%xterm


 # Prompt and Generation:

In [None]:
# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOllama(model='llama3', format="json", temperature=0)


In [None]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [None]:

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Question
rag_chain.invoke("What is Task Decomposition?")
