## Setup Instructions

Download, install and starts [Ollama](https://ollama.com/)

Run `ollama run gemma2:2b`

Start chatting with a Gemma model that runs directly on your laptop!

Then you can explore many more models here!

# Generate content

In [None]:
import ollama
from IPython.display import Markdown, display

def get_response_from_model(prompt, model):
    response = ollama.generate(model=model, prompt=prompt)
    return response['response']

# Define the prompt and model
prompt = "Is Ollama the best tool I have ever encountered?"
model = "gemma2:2b"

# Get the response from the model
response = get_response_from_model(prompt, model)

# Display the Markdown
display(Markdown(response))

# Retieval augmented generation (RAG)

In [None]:
# first ollama pull mxbai-embed-large
import ollama
import chromadb

documents = [
  "Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels",
  "Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands",
  "Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall",
  "Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight",
  "Llamas are vegetarians and have very efficient digestive systems",
  "Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old",
]

client = chromadb.Client()
collection = client.create_collection(name="docs")

# store each document in a vector embedding database
for i, d in enumerate(documents):
  response = ollama.embed(model="mxbai-embed-large", input=d)
  embeddings = response["embeddings"]
  collection.add(
    ids=[str(i)],
    embeddings=embeddings,
    documents=[d]
  )

In [None]:
# an example input
input = "What animals are llamas related to?"

# generate an embedding for the input and retrieve the most relevant doc
response = ollama.embed(
  model="mxbai-embed-large",
  input=input
)
results = collection.query(
  query_embeddings=[response["embeddings"][0]],
  n_results=1
)
data = results['documents'][0][0]
print(data)

In [None]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="gemma2:2b",
  prompt=f"Using this data: {data}. Respond to this prompt: {input}"
)

print(output['response'])

# More realistic scenario

In [None]:
import ollama
import chromadb
from datasets import load_dataset

# Load the ag_news dataset
dataset = load_dataset("ag_news", split="train[:1000]")  # Using the first 1000 articles for example

# Sample documents from the dataset (for simplicity, use the 'text' field)
documents = dataset['text']

client = chromadb.Client()
collection = client.create_collection(name="news")

# Store each document in a vector embedding database
for i, d in enumerate(documents):
  response = ollama.embed(model="mxbai-embed-large", input=d)
  embeddings = response["embeddings"]
  collection.add(
    ids=[str(i)],
    embeddings=embeddings,
    documents=[d]
  )

In [None]:
# an example input
input = "What is the latest news from the UK?"

# generate an embedding for the input and retrieve the most relevant doc
response = ollama.embed(
  model="mxbai-embed-large",
  input=input
)
results = collection.query(
  query_embeddings=[response["embeddings"][0]],
  n_results=1
)
data = results['documents'][0][0]
print (data)

In [None]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="gemma2:2b",
  prompt=f"Using this data: {data}. Respond to this prompt: {input}"
)

print(output['response'])