# Ollama API Usage Demonstration

This notebook demonstrates how to use both the native Ollama API and our Python wrapper layer.

## 1. Setup and Imports

In [16]:
import requests
import json
import os
import Ollama_utils as ou
import numpy as np
import matplotlib.pyplot as plt

## 2. Native Ollama API
First, let's demonstrate using the native Ollama API directly.

In [17]:
# Check for Ollama
# Add auto-download capability for llama3 if no models are present
def download_llama3_model():
    """Downloads the llama3 model automatically if no models are installed"""
    print("🔄 No models found. Automatically downloading llama3 model (approx. 4GB)...")
    
    url = "http://localhost:11434/api/pull"
    payload = {"name": "llama3"}
    
    try:
        # Start the download with stream=True to monitor progress
        with requests.post(url, json=payload, stream=True) as response:
            if response.status_code == 200:
                # Process the streaming response to show progress
                for line in response.iter_lines():
                    if line:
                        data = json.loads(line)
                        if "status" in data:
                            if "completed" in data["status"]:
                                print(f"✅ Download complete!")
                                return True
                            elif "downloading" in data["status"]:
                                if "total" in data and "completed" in data:
                                    percent = (data["completed"] / data["total"]) * 100
                                    print(f"Downloading: {percent:.1f}% complete", end="\r")
            else:
                print(f"❌ Error downloading model: {response.status_code} - {response.text}")
                return False
    except Exception as e:
        print(f"❌ Error connecting to Ollama: {str(e)}")
        return False

# Set the preferred model
preferred_model = "llama3"

# Check for models and download automatically if none exist
try:
    response = requests.get("http://localhost:11434/api/tags")
    if response.status_code == 200:
        models = response.json().get("models", [])
        model_names = [model['name'] for model in models]
        
        if not models:
            print("No models detected. Starting automatic download...")
            success = download_llama3_model()
            if success:
                # Check available models again
                response = requests.get("http://localhost:11434/api/tags")
                if response.status_code == 200:
                    models = response.json().get("models", [])
                    model_names = [model['name'] for model in models]
                    print(f"✅ Using model: {preferred_model}")
            else:
                print("⚠️ Failed to download llama3 model. This notebook requires a model to function properly.")
        else:
            # Models exist, determine which to use
            if f"{preferred_model}:latest" in model_names or preferred_model in model_names:
                print(f"✅ Using preferred model: {preferred_model}")
            else:
                # Use the first available model
                alternative_model = model_names[0]
                preferred_model = alternative_model.split(':')[0]  # Remove ':latest' if present
                print(f"✅ Using available model: {preferred_model}")
                print(f"Note: For optimal results, consider installing llama3 model")
            
            print(f"Available models: {model_names}")
except Exception as e:
    print(f"❌ Error checking models: {str(e)}")

✅ Using preferred model: llama3
Available models: ['llama3:latest']


In [6]:
# Direct API call for demonstration
def generate_search_terms(query, model="llama3"):
    """Use Ollama to generate search terms for a query"""
    url = "http://localhost:11434/api/generate"
    prompt = f"""Given the following search query, extract 3-5 key search terms that would be most effective 
    for finding relevant documents. Return only the terms separated by commas, no explanations.
    
    Query: {query}
    """
    
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        response = requests.post(url, json=payload)
        if response.status_code == 200:
            return response.json().get("response", "")
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Error: {str(e)}"

## 3. Python Wrapper Layer
Now let's use our Python wrapper which simplifies these operations.

In [7]:
# Try the native API to generate search terms
query = "How do I implement secure authentication in a web application?"
search_terms = generate_search_terms(query)
print(f"\nFor query: '{query}'")
print(f"Generated search terms: {search_terms}")

# 2. Document Embedding and Indexing
print("\nDemonstrating document embedding and indexing...")

# Create sample documents for demonstration
os.makedirs("demo_docs", exist_ok=True)

sample_docs = [
    ("authentication.md", """
    # Authentication Best Practices
    
    Authentication is critical for web applications. This guide covers:
    
    - Password hashing with bcrypt
    - Multi-factor authentication
    - JWT tokens for API authentication
    - Session management
    - OAuth 2.0 integration
    """),
    
    ("security_overview.md", """
    # Security Overview
    
    A comprehensive security strategy includes:
    
    - Authentication and authorization
    - Input validation and sanitization
    - HTTPS/TLS encryption
    - Regular security audits
    - Data encryption at rest
    """),
    
    ("api_design.md", """
    # API Design Guide
    
    Building robust APIs requires consideration of:
    
    - Authentication mechanisms (JWT, OAuth)
    - Rate limiting
    - Versioning strategy
    - Error handling
    - Documentation
    """)
]


For query: 'How do I implement secure authentication in a web application?'
Generated search terms: Secure, Authentication, Web Application

Demonstrating document embedding and indexing...


In [65]:
# Try the native API to generate search terms
query = "How do I implement secure authentication in a web application?"
search_terms = generate_search_terms(query)
print(f"\nFor query: '{query}'")
print(f"Generated search terms: {search_terms}")

# 2. Document Embedding and Indexing
print("\nDemonstrating document embedding and indexing...")

# Create sample documents for demonstration
os.makedirs("demo_docs", exist_ok=True)

sample_docs = [
    ("authentication.md", """
    # Authentication Best Practices
    
    Authentication is critical for web applications. This guide covers:
    
    - Password hashing with bcrypt
    - Multi-factor authentication
    - JWT tokens for API authentication
    - Session management
    - OAuth 2.0 integration
    """),
    
    ("security_overview.md", """
    # Security Overview
    
    A comprehensive security strategy includes:
    
    - Authentication and authorization
    - Input validation and sanitization
    - HTTPS/TLS encryption
    - Regular security audits
    - Data encryption at rest
    """),
    
    ("api_design.md", """
    # API Design Guide
    
    Building robust APIs requires consideration of:
    
    - Authentication mechanisms (JWT, OAuth)
    - Rate limiting
    - Versioning strategy
    - Error handling
    - Documentation
    """)
]


For query: 'How do I implement secure authentication in a web application?'
Generated search terms: secure, authentication, web-application

Demonstrating document embedding and indexing...


In [8]:
# Create sample files
for filename, content in sample_docs:
    with open(f"demo_docs/{filename}", "w") as f:
        f.write(content)
print(f"Created {len(sample_docs)} sample documents in 'demo_docs/'")

Created 3 sample documents in 'demo_docs/'


In [9]:
# Get embedding model
model = ou.get_embedding_model()
print(f"Using embedding model: {model.__class__.__name__}")
print(f"Embedding dimension: {model.get_sentence_embedding_dimension()}")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Using embedding model: SentenceTransformer
Embedding dimension: 768


In [10]:
# Index the documents
print("\nIndexing documents...")
file_paths = ou.scan_directory("demo_docs")
print(f"Found {len(file_paths)} files")


Indexing documents...
Found 3 files


In [11]:
# Track progress
def progress_callback(progress, message):
    print(f"Progress: {progress*100:.1f}% - {message}")

success = ou.build_document_index(
    file_paths,
    index_path="demo_docs/index.bin",
    metadata_path="demo_docs/metadata.pkl",
    progress_callback=progress_callback
)
print(f"Indexing {'completed successfully' if success else 'failed'}")

Building index for 3 files
Processing 3 new files
Using 8 parallel workers
Saved to cache: index/cache/_app_demo_docs_security_overview.md_1747530115.4244177_253.pkl
Progress: 30.0% - Processed 1/3 files
Saved to cache: index/cache/_app_demo_docs_api_design.md_1747530115.4300435_222.pkl
Progress: 60.0% - Processed 2/3 files
Saved to cache: index/cache/_app_demo_docs_authentication.md_1747530115.4222198_285.pkl
Progress: 90.0% - Processed 3/3 files
Progress: 95.0% - Building FAISS index
Creating FAISS index with 3 chunks (dim=768)
Creating new FAISS index
Progress: 100.0% - Index built successfully
✅ Indexing complete
Indexing completed successfully


In [12]:
# 3. Document Search
print("\nSearching for documents...")
search_queries = [
    "authentication methods for web apps",
    "API security best practices",
    "how to protect user data"
]


Searching for documents...


In [13]:
# Test search functionality
for query in search_queries:
    print(f"\nSearching for: '{query}'")
    results = ou.search_documents(
        query,
        top_k=2,
        index_path="demo_docs/index.bin",
        metadata_path="demo_docs/metadata.pkl"
    )
    
    if "error" in results:
        print(f"Error: {results['error']}")
    else:
        print(f"Found {len(results)} results:")
        for i, result in enumerate(results):
            print(f"\n[{i+1}] {result['filename']} (Score: {result['score']:.3f})")
            print(f"Snippet: {result['snippet'][:150]}...")


Searching for: 'authentication methods for web apps'
Found 2 results:

[1] authentication.md (Score: 0.675)
Snippet: 
    # Authentication Best Practices
    
    Authentication is critical for web applications. This guide covers:
    
    - Password hashing with bcr...

[2] api_design.md (Score: 0.466)
Snippet: 
    # API Design Guide
    
    Building robust APIs requires consideration of:
    
    - Authentication mechanisms (JWT, OAuth)
    - Rate limiting...

Searching for: 'API security best practices'
Found 2 results:

[1] api_design.md (Score: 0.684)
Snippet: 
    # API Design Guide
    
    Building robust APIs requires consideration of:
    
    - Authentication mechanisms (JWT, OAuth)
    - Rate limiting...

[2] authentication.md (Score: 0.540)
Snippet: 
    # Authentication Best Practices
    
    Authentication is critical for web applications. This guide covers:
    
    - Password hashing with bcr...

Searching for: 'how to protect user data'
Found 2 results:

[1] secur

In [14]:
# 4. Search Enhancement with Ollama
print("\nEnhancing search with Ollama...")
query = "secure login implementation"

# First get raw search results
print(f"Original query: '{query}'")
raw_results = ou.search_documents(
    query,
    top_k=2,
    index_path="demo_docs/index.bin",
    metadata_path="demo_docs/metadata.pkl"
)

# Use Ollama to enhance the query
enhanced_query = ou.query_ollama(
    f"Rewrite this search query to be more comprehensive for finding technical documentation: '{query}'. Return only the enhanced query and nothing else in beginning or end.",
    model="llama3"
)
print(f"Enhanced query: '{enhanced_query}'")


Enhancing search with Ollama...
Original query: 'secure login implementation'
Enhanced query: '"site:.pdf OR site:.docx OR site:.html OR site:.txt OR site:.md "secure login implementation" (implementation OR integration OR setup OR guide OR manual) (security OR authentication OR authorization) (API OR protocol OR standards OR compliance) -javascript -python'


In [15]:
# Search with enhanced query
enhanced_results = ou.search_documents(
    enhanced_query,
    top_k=2,
    index_path="demo_docs/index.bin",
    metadata_path="demo_docs/metadata.pkl"
)

# Compare results
print("\nComparison of search results:")
print("Original query results:")
for i, result in enumerate(raw_results):
    print(f"[{i+1}] {result['filename']} (Score: {result['score']:.3f})")

print("\nEnhanced query results:")
for i, result in enumerate(enhanced_results):
    print(f"[{i+1}] {result['filename']} (Score: {result['score']:.3f})")

# 5. Cleanup
print("\nCleaning up demo files...")
# Uncomment to remove demo files
import shutil
shutil.rmtree("demo_docs")
print("Demo complete!")


Comparison of search results:
Original query results:
[1] authentication.md (Score: 0.546)
[2] security_overview.md (Score: 0.483)

Enhanced query results:
[1] security_overview.md (Score: 0.543)
[2] authentication.md (Score: 0.477)

Cleaning up demo files...
Demo complete!
