# Install Ollama on Colab


In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

# Install dependencies

In [None]:
! pip install --quiet langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python langchain-mistralai gpt4all

# Load libraries

In [1]:
import os
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_community.chat_models import ChatOllama
from langchain.load import dumps, loads
from operator import itemgetter
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
# Activate Langsmith tracing
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = <langsmith_api_key>

# Loading document

In [3]:
# Load blog

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [4]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500,
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)


# Load vector storage Chromadb

In [5]:
# Index
embedding = GPT4AllEmbeddings()
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embedding)
retriever = vectorstore.as_retriever()

# Multi Query
In multiquerying, we make the LLM come up with multiple versions of the question that would be more appropriate for similarity searches on the vector embedding space

In [6]:
# Multi Query: Different Perspectives

template = """You are an AI language model assistant. You are to generate five
different versions of the given user question. By generating multiple perspectives
on the user question, your goal is to help the user overcome some of the limitations
of the distance-based similarity search. Provide these alternative questions
separated by commas. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

# Local LLM
local_llm = "llama2:13b"

llm = ChatOllama(model=local_llm, format="json", temperature=0)

generate_queries = (
    prompt_perspectives
    | llm
    | JsonOutputParser()
    | (lambda x: list(x.keys()) + list(x.values()))
)

In [7]:
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

# Retrieve
question = "What is prompt engineering?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})


In [8]:
# RAG
prompt = hub.pull("rlm/rag-prompt")

final_rag_chain = (
    {"context": retrieval_chain,
    "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'{ "Prompt engineering refers to the process of designing and crafting input statements, or prompts, for large language models to generate specific and desired outputs. It involves understanding the model\'s capabilities and limitations, as well as the nuances of human language and communication."\n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n'