In [1]:
import ollama

In [2]:
import pandas as pd

In [3]:
import chromadb

In [4]:
from chromadb.config import Settings
from chromadb.utils import embedding_functions

In [8]:
gen_model = "granite3.1-dense:2b"
#gen_model = "llama3-chatqa:8b"
#embed_model = "nomic-embed-text"
embed_model = "granite-embedding:30m"
#embed_model = "granite-embedding:278m"
collection_name = "k9_ltm_gran_30m"


In [9]:
chroma_client = chromadb.PersistentClient(path="./chroma_db")

In [10]:
collection = chroma_client.get_or_create_collection(
    name=collection_name
)

In [11]:
def add_documents(documents):
    """
    Add documents to ChromaDB
    """
    for i, doc in enumerate(documents):
        response = ollama.embed(model=embed_model, input=doc)
        embeddings = response["embeddings"]
        collection.add(
            ids=[str(i)],
            embeddings=embeddings,
            documents=[doc]
        )
    print(f"Added {len(documents)} documents to K9's long term memory.")

In [12]:
def retrieve_document(query):
    """
    Retrieve the most relevant document
    """
    response = ollama.embed(
        model=embed_model,
        input = query
    )
    results = collection.query(
        query_embeddings=response["embeddings"],
        n_results=1
    )
    if results["documents"]:
        return results["documents"][0][0]  # Return top match
    return None

In [13]:
df = pd.read_csv('k9_stories_500.csv')
print(f"{len(df)} rows in the data.")
df.sample(5)

72 rows in the data.


Unnamed: 0,title,mistress,synopsis
1,The Invisible Enemy,Leela,I was put to the test when the Doctor received...
30,The Androids of Tara,Romana,I stayed in the TARDIS while Doctor Master and...
39,The Armageddon Factor,Romana,The Doctor Master leads a servant of the Shado...
35,The Androids of Tara,Romana,Grendel then plotted to have Romana pose as St...
27,The Stones of Blood,Romana,The Mistress caught Vivien awakening more ston...


In [14]:
add_documents(df['synopsis'])

Added 72 documents to K9's long term memory.


In [19]:
query = "How many segments does the Key to Time have?"
data = retrieve_document(query = query)
print(data)

I noticed that the Key to Time locator was giving odd signals that seemed to indicate that the segment was everywhere. Once my master and mistress saw the Captain's trophy room of planets, they concluded that Calufrax was the segment that they were looking for. The Captain's plan was to use the gravitational power of all the crushed worlds to essentially drill a hole through the time dams, bypass its fail-safe mechanism, and let time move forward so the queen dies. 


In [20]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model=gen_model,
  prompt=f"You are K9, a robot dog. Using only this data: {data} generate K9's short, single sentence, response to this question: {query}"
)

print(output['response'])

"Based on the Captain's plan and the information from the trophy room, there are at least seven segments of the Key to Time, with Calufrax being one."
