# Ollama Web Rag
## Config Web Loader

In [108]:
import os

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import PGVector
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv(dotenv_path='.env')

import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, Markdown



## Load Webpages

In [109]:
# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]


## Split text into chunks

In [110]:
#todo findout what tiktoken is text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=200)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
chunks = text_splitter.split_documents(docs_list)
print(f"Text split into {len(chunks)} chunks")

Text split into 378 chunks


## Vector Database

In [111]:
collection_name = "local-rag"

chroma_db_openai = Chroma.from_documents(
    documents=chunks,
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    collection_name=f"{collection_name}-openai"
)

chroma_db_nomic = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name= f"{collection_name}-nomic"
)

pgvector_db_openai = PGVector.from_documents(
    collection_name=f"{collection_name}-openai",
    documents=chunks,
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    use_jsonb=True
)
pgvector_db_nomic = PGVector.from_documents(
    collection_name=f"{collection_name}-nomic",
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    use_jsonb=True
)

print(f"Vector database created successfully")

Vector database created successfully


## Create Chain

In [112]:
local_model = "llama3.2:latest"
#local_model = "granite3-dense:8b"
llm = ChatOllama(model=local_model)

query_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant.  Your task is to generate 2 
    different versions of the give user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on user question, your
    goal is to help users overcome some of the limitations of distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)
chroma_retriever_openai = MultiQueryRetriever.from_llm(
    chroma_db_openai.as_retriever(),
    llm,
    prompt=query_prompt
)
pg_retriever_openai = MultiQueryRetriever.from_llm(
    pgvector_db_openai.as_retriever(),
    llm,
    prompt=query_prompt
)
chroma_retriever_nomic= MultiQueryRetriever.from_llm(
    chroma_db_nomic.as_retriever(),
    llm,
    prompt=query_prompt
)
pg_retriever_nomic = MultiQueryRetriever.from_llm(
    pgvector_db_nomic.as_retriever(),
    llm,
    prompt=query_prompt
)

In [101]:
template = """Answer the question on on the following context: 
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [113]:
chroma_chain_openai = (
    {"context": chroma_retriever_openai, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

pg_chain_openai = (
    {"context": pg_retriever_openai, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
chroma_chain_nomic = (
    {"context": chroma_retriever_nomic, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

pg_chain_nomic = (
    {"context": pg_retriever_nomic, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [114]:
def chroma_chat_with_web_openai(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chroma_chain_openai.invoke(question)))
def pg_chat_with_web_openai(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(pg_chain_openai.invoke(question)))
def chroma_chat_with_web_nomic(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(chroma_chain_nomic.invoke(question)))
def pg_chat_with_web_nomic(question): 
    """
    Chat with the PDF using our chain
    """
    return display(Markdown(pg_chain_nomic.invoke(question)))


In [115]:
chroma_chat_with_web_openai("What is prompt engineering?")

According to the provided documents, prompt engineering, also known as In-Context Prompting, refers to methods for communicating with LLMs (Large Language Models) to steer their behavior for desired outcomes without updating the model weights. It is an empirical science that requires heavy experimentation and heuristics, as the effect of prompt engineering methods can vary a lot among models.

In [116]:
chroma_chat_with_web_nomic("What is prompt engineering?")

According to the provided context, Prompt Engineering (also known as In-Context Prompting) refers to methods for communicating with LLMs to steer their behavior towards desired outcomes without updating the model weights. It is an empirical science that requires heavy experimentation and heuristics, and its effect can vary greatly among models.

In [117]:
pg_chat_with_web_openai("What is prompt engineering?")

Prompt engineering refers to methods for how to communicate with a Large Language Model (LLM) to steer its behavior for desired outcomes without updating the model weights. It is an empirical science, and the effect of prompt engineering methods can vary a lot among models, requiring heavy experimentation and heuristics. Prompt engineering focuses on alignment and model steerability.

In [118]:
pg_chat_with_web_nomic("What is prompt engineering?")

Based on the provided context, prompt engineering, also known as In-Context Prompting, refers to methods for communicating with LLMs to steer their behavior for desired outcomes without updating the model weights. It involves using a sequence of prefix tokens (prompts) to increase the probability of getting a desired output given input, and can be optimized directly on the embedding space via gradient descent. The goal of prompt engineering is to fine-tune the behavior of LLMs without modifying their underlying weights, allowing for more flexibility and control over their performance.