In [7]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [9]:
!ollama list

NAME                       ID              SIZE      MODIFIED          
nomic-embed-text:latest    0a109f422b47    274 MB    About an hour ago    
llama3.2-vision:latest     085a1fdae525    7.9 GB    2 days ago           
phi3:latest                4f2222927938    2.2 GB    3 days ago           
gemma2:2b                  8ccf136fdd52    1.6 GB    3 days ago           
llama3.2:latest            a80c4f17acd5    2.0 GB    3 days ago           
phi3:medium                cf611a26b048    7.9 GB    3 days ago           
llama3.1:latest            46e0c10c039e    4.9 GB    3 days ago           
mistral:latest             f974a74358d6    4.1 GB    3 days ago           


In [23]:
EMBEDDING_MODEL = "nomic-embed-text"
LLM_MODEL = "llama3.2"

llm = ChatOllama(model=LLM_MODEL)

In [11]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [13]:
import chromadb

chroma_client = chromadb.Client()
collections = chroma_client.list_collections()

collections

# chroma_client = delete_collection("langchain")
# chroma_client.list_collections()

[]

In [14]:
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings

# Embed
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OllamaEmbeddings(
        model=EMBEDDING_MODEL
    )
)

retriever = vectorstore.as_retriever(
    search_kwargs={"k": 1}
)

In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# Model
llm = ChatOllama(model=LLM_MODEL)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

'Task decomposition involves breaking down a complex task into smaller, more manageable subtasks. This technique uses "Chain of thought" prompting to guide large language models (LLMs) to think step-by-step, ultimately decomposing the original complex task for better model performance. By doing so, task decomposition sheds light on how LLMs approach problem solving and provide a clearer interpretation of their reasoning process. \n'

In [24]:
# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

question = "What is Task Decomposition?"

# Chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Run
rag_chain.invoke(question)

'Task Decomposition is a technique where a model is instructed to "think step by step" to break down complicated tasks into smaller and simpler steps, as suggested by the Chain of Thought (CoT) prompting technique. This allows the model to utilize more test-time computation and gain insights into its own thinking process.'