# TOS - AI Support Agent

In [60]:
import ollama
from ollama import ListResponse
from typing import Sequence

In [61]:
T_MODEL = ListResponse.Model
DEFAULT_LLM = "llama3.2:latest"
DEFAULT_EMBEDDING_MODEL = "nomic-embed-text:latest"
BASE_OLLAMA_URL = "http://localhost:11434/v1"

## Check for llama3.2 model

In [62]:
def list_available_llms() -> Sequence[T_MODEL]:
    return ollama.list().models

In [63]:
def does_llama3_exists(models: Sequence[T_MODEL]) -> bool:
    found = False
    for llm in models:
        if llm.model == DEFAULT_LLM:
            found = True
            break
    return found

def does_nomic_exists(models: Sequence[T_MODEL]) -> bool:
    found = False
    for llm in models:
        if llm.model == DEFAULT_EMBEDDING_MODEL:
            found = True
            break
    return found

In [64]:
llmList = list_available_llms()

if does_llama3_exists(llmList) and does_nomic_exists(llmList):
    print("👋🏻 Found llama3.2 & nomic, good to proceed 🚀")
else:
    print("💣 llama3.2 or nomic, not found, install it before proceeding.... 👾")

👋🏻 Found llama3.2 & nomic, good to proceed 🚀


In [65]:
from ollama import chat
from ollama import ChatResponse

def get_llm_response(query: str) -> ChatResponse:
  return chat(model='llama3.2', messages=[
  {
    'role': 'user',
    'content': query,
  },
])

response: ChatResponse = get_llm_response("Howdy, How are you?")

print(response.message.content)

I'm just a language model, so I don't have feelings or emotions like humans do, but thank you for asking! It's great to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?


In [66]:
ollama.embed(model='nomic-embed-text', input='The sky is blue because of rayleigh scattering')

EmbedResponse(model='nomic-embed-text', created_at=None, done=None, done_reason=None, total_duration=16256834, load_duration=2250167, prompt_eval_count=9, prompt_eval_duration=None, eval_count=None, eval_duration=None, embeddings=[[0.027598167, 0.018781658, -0.15477675, -0.024644945, 0.035094947, 0.07115604, -0.005861922, 0.018551059, 0.0031759313, -0.05195587, 0.032453492, 0.059863314, 0.05370034, 0.051027715, 0.011734799, 0.013722353, 0.0071139983, -0.02972811, -0.009842749, -0.009175068, -0.0841475, -0.029480096, 0.0018210462, -0.031335138, 0.059098028, 0.059840374, -0.00749137, -0.00011438317, -0.013929074, -0.022525767, 0.056464385, -0.029912323, -0.025303975, -0.048517343, 0.029587407, -0.05664888, 0.03202192, -0.0027435878, -0.009240757, 0.0059798677, -0.00067473727, -0.025977822, 0.016479101, 0.0021058987, 0.02799393, -0.044759456, 0.023799997, 0.073737055, -0.03347662, -0.017991923, -0.031321146, 0.053447314, -0.004839368, -0.0912219, 0.021824727, 0.067210935, 0.02369567, -0.0

## Vector DB setup

In [67]:
import chromadb
from chromadb.api import ClientAPI

In [68]:
def setup_db() -> ClientAPI | None:
    chromadb.PersistentClient("../../db/")

In [69]:
setup_db()

In [70]:
import zipfile
import os
from bs4 import BeautifulSoup
from chromadb import PersistentClient
from tqdm import tqdm
import ollama

# ---- Config ----
ZIP_PATH = 'docs_dump.zip'
EXTRACT_DIR = 'extracted_docs'
CHROMA_DB_PATH = '../../db/'
COLLECTION_NAME = 'help_docs'
CHUNK_SIZE = 500  # words per chunk
CHUNK_OVERLAP = 100
SKIP_EXTENSIONS = {
    '.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico',
    '.woff', '.woff2', '.ttf', '.eot', '.mp4', '.mp3', '.pdf'
}


# ---- Utilities ----
def extract_zip(zip_path, extract_dir):
    os.makedirs(extract_dir, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)


def is_valid_file(file):
    ext = os.path.splitext(file)[-1].lower()
    return ext in ['.html', '.htm'] and ext not in SKIP_EXTENSIONS


def html_to_text(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()
    except UnicodeDecodeError:
        try:
            with open(filepath, 'r', encoding='latin-1') as f:
                content = f.read()
        except Exception as e:
            print(f"❌ Failed to decode {filepath}: {e}")
            return None

    soup = BeautifulSoup(content, 'html.parser')
    return soup.get_text(separator=' ').strip()



def chunk_text(text, chunk_size=500, overlap=100):
    words = text.split()
    chunks = []
    start = 0
    while start < len(words):
        end = start + chunk_size
        chunks.append(' '.join(words[start:end]))
        start += chunk_size - overlap
    return chunks


def embed_chunks(chunks):
    return [
        ollama.embeddings(model='nomic-embed-text', prompt=chunk)['embedding']
        for chunk in tqdm(chunks, desc='Embedding chunks')
    ]


def save_to_chromadb(chunks, embeddings, collection_name, db_path):
    client = PersistentClient(path=db_path)
    if collection_name in [c.name for c in client.list_collections()]:
        collection = client.get_collection(collection_name)
    else:
        collection = client.create_collection(collection_name)

    for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
        collection.add(
            ids=[f'doc-{i}'],
            documents=[chunk],
            embeddings=[embedding]
        )


# ---- Main ----
def process_zip(zip_path):
    extract_zip(zip_path, EXTRACT_DIR)

    for root, _, files in os.walk(EXTRACT_DIR):
        for file in files:
            if is_valid_file(file):
                filepath = os.path.join(root, file)
                text = html_to_text(filepath)
                if text:
                    chunks = chunk_text(text, CHUNK_SIZE, CHUNK_OVERLAP)
                    embeddings = embed_chunks(chunks)
                    save_to_chromadb(chunks, embeddings, COLLECTION_NAME, CHROMA_DB_PATH)
                else:
                    print(f"⚠️ Skipping undecodable file: {file}")
            else:
                print(f"⏭️ Skipping non-HTML file: {file}")

    print("✅ All embeddings saved to ChromaDB!")


In [71]:
process_zip("../../sampledata/docs.zip")

Embedding chunks: 100%|██████████| 1/1 [00:00<00:00, 108.55it/s]
Embedding chunks: 100%|██████████| 1/1 [00:00<00:00, 111.88it/s]


⏭️ Skipping non-HTML file: script.js
⏭️ Skipping non-HTML file: style.css
⏭️ Skipping non-HTML file: ._docs


Embedding chunks: 100%|██████████| 1/1 [00:00<00:00, 115.06it/s]
Embedding chunks: 100%|██████████| 1/1 [00:00<00:00, 81.35it/s]

⏭️ Skipping non-HTML file: ._style.css
⏭️ Skipping non-HTML file: ._script.js
✅ All embeddings saved to ChromaDB!





In [72]:
def get_db_client():
    client = PersistentClient(path=CHROMA_DB_PATH)
    return client

In [73]:
query = "What is TOS?"
collection = get_db_client().get_collection(COLLECTION_NAME)

In [74]:
def find_related_chunks(query, collection, top_k=3):
    query_embedding = ollama.embeddings(
        model="nomic-embed-text",
        prompt=query
    )["embedding"]

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    return list(zip(
        results["documents"][0],
        results.get("distances", [[]])[0]
    ))


In [75]:
related_chunks = find_related_chunks(query, collection)
related_chunks

[('TOS Welcome to TOS TOS stands for Technical Operation Services',
  155.60223388671875)]

In [76]:
def build_augmented_prompt(query, related_chunks):
    context = "\n".join([chunk[0] for chunk in related_chunks])
    prompt = f"""You are a helpful assistant. Use the context below to answer the user's question.

Context:
{context}

Question:
{query}

Answer:"""
    return prompt


In [77]:
ag_prompt = build_augmented_prompt(query, related_chunks)

In [78]:
def generate_response(augmented_prompt) -> ChatResponse:
  return chat(model='llama3.2', messages=[
    {
        "role": "system",
        "content": "You are a helpful assistant who can answer questions about space but only answers questions that are directly related to the sources/documents given.",
    },
    {"role": "user", "content": augmented_prompt}
    ])

In [79]:
response: ChatResponse = generate_response(ag_prompt)
print(response.message.content)

I'm not able to find any information on what TOS specifically refers to in this context, as you haven't provided any additional information or sources related to Technical Operation Services. Can you please provide more context or clarify which source the term "TOS" comes from? I'll do my best to help with your question.


In [80]:
query = "Who is the author"
related_chunks = find_related_chunks(query, collection)
print(related_chunks)
ag_prompt = build_augmented_prompt(query, related_chunks)
response: ChatResponse = generate_response(ag_prompt)
print(response.message.content)

[('TOS Welcome to TOS TOS stands for Technical Operation Services', 563.8402099609375)]
Unfortunately, I don't have any information about the author of TOS in my knowledge database as it was not specified. The provided context does not mention any sources or documents that would allow me to determine the author. Can you provide more context or clarify which document or source you are referring to?


In [81]:
client = get_db_client()
collection = client.get_collection(name=COLLECTION_NAME)
collection.count()

1