# Install Ollama on Colab



In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

# Install dependencies

In [None]:
! pip install --quiet langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python langchain-mistralai gpt4all

# Load libraries

In [25]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain import hub
import bs4

In [26]:
# Activate Langsmith tracing
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = <langsmith_api_key>

In [27]:
# Choose model in Ollama
local_llm = "mistral:latest"

In [28]:
# Load
url = "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
loader = WebBaseLoader(url)
docs = loader.load()

In [29]:
# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=100
)
all_splits = text_splitter.split_documents(docs)

In [30]:
# Embed and index
embedding = GPT4AllEmbeddings()

In [31]:
# Index
vectorstore = Chroma.from_documents(
    documents=all_splits,
    collection_name="rag-chroma",
    embedding=embedding,
)
retriever = vectorstore.as_retriever()

In [32]:
# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# RAG prompt
prompt = hub.pull("rlm/rag-prompt")
question = "What is prompt engineering?"

In [35]:
# Chain
rag_chain = prompt | llm | JsonOutputParser()
result = rag_chain.invoke({"context": docs, "question": question})

In [36]:
result

{'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/',
 'title': "Prompt Engineering | Lil'Log",
 'description': 'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models.',
 'language': 'en'}