# Ollama Server with Ngrok (Public API) - v2

Run this notebook in Google Colab to expose a generic Ollama API on a public URL.  
**Requirements**:
- Ngrok Authtoken (Get it from [dashboard.ngrok.com](https://dashboard.ngrok.com))

In [None]:
# 1. Install Ollama & Ngrok
!curl -fsSL https://ollama.com/install.sh | sh
!pip install pyngrok

In [None]:
# 2. Configure Ngrok
from pyngrok import ngrok

# REPLACE THIS with your actual token
NGROK_TOKEN = "YOUR_NGROK_AUTHTOKEN_HERE"

ngrok.set_auth_token(NGROK_TOKEN)
print("Ngrok token set.")

In [None]:
# 3. Start Ollama Server
import subprocess
import time
import os
import threading

def stream_output(pipe, prefix):
    for line in iter(pipe.readline, b''):
        print(f"{prefix}: {line.decode().strip()}")

# Determine enviroment
my_env = os.environ.copy()
my_env["OLLAMA_ORIGINS"] = "*"
my_env["OLLAMA_HOST"] = "0.0.0.0"

print("Starting Ollama server...")
process = subprocess.Popen(
    ["ollama", "serve"], 
    env=my_env,
    stdout=subprocess.PIPE, 
    stderr=subprocess.PIPE
)

# Start threads to print logs (helps debugging)
threading.Thread(target=stream_output, args=(process.stdout, "OLLAMA_OUT")).start()
threading.Thread(target=stream_output, args=(process.stderr, "OLLAMA_ERR")).start()

time.sleep(5)
print("Ollama serving on port 11434.")

In [None]:
# 4. Pull Model
print("Pulling Llama 3.1 model...")
!ollama pull llama3.1

In [None]:
# 5. Start Tunnel
public_url = ngrok.connect(11434, bind_tls=True).public_url
print(f"\n>>> YOUR PUBLIC OLLAMA URL IS: {public_url} <<<\n")
print("Copy this URL and pass it to your local app via --api-url")

In [None]:
# Keep cell running
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("Stopping...")