# Ollama Web Rag
## Config Web Loader

In [24]:
import os

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import PGVector
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings

import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown

os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMETATION"] = "python"
os.environ["USER_AGENT"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
os.environ["OPENAI_API_KEY"] =  "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"

## Load Webpages

In [25]:
# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]


## Split text into chunks

In [26]:
#todo findout what tiktoken is text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=200)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
chunks = text_splitter.split_documents(docs_list)
print(f"Text split into {len(chunks)} chunks")

Text split into 378 chunks


## Vector Database

In [27]:
chroma_db = Chroma.from_documents(
    documents=chunks,
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    collection_name="local-rag-tacoma"
)
pgvector = PGVector.from_documents(
    documents=chunks,
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    collection_name="local-rag-tacoma"
)
print(f"Vector database created successfully")

Vector database created successfully


In [29]:
local_model = "llama3.2:latest"
#local_model = "granite3-dense:8b"
llm = ChatOllama(model=local_model)

query_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant.  Your task is to generate 2 
    different versions of the give user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on user question, your
    goal is to help users overcome some of the limitations of distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)
retriever = MultiQueryRetriever.from_llm(
    chroma_db.as_retriever(),
    llm,
    prompt=query_prompt
)

## Create Chain

In [31]:
template = """Answer the question on on the following context: 
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [32]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [33]:
def chat_with_web(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chain.invoke(question)))

In [34]:
chat_with_web("What is prompt engineering?")

According to the provided context, Prompt Engineering, also known as In-Context Prompting, refers to methods for communicating with Large Language Models (LLMs) to steer their behavior for desired outcomes without updating the model weights. It is an empirical science that requires heavy experimentation and heuristics, and its effectiveness can vary greatly among different models.