# Backdoor AI - Ollama on Google Colab (Memory Optimized)

This notebook helps you run Ollama on Google Colab to use with your Backdoor AI application. You can install and run Ollama models (including Llama4) directly in Colab, then connect your Backdoor AI app to it.

## How it works

1. This notebook will first optimize your Colab environment for large models
2. We'll install Ollama with special modifications for Colab
3. You'll select and download models (Llama4 or others)
4. We'll set up Cloudflared to create a secure tunnel to your Ollama instance
5. You'll get a URL to use in your Backdoor AI settings

This version is specially optimized for memory efficiency with large models.

Let's get started!

## 1. Memory Optimization for Large Models

First, let's clear up disk space and optimize memory to ensure we have enough resources for large models.

In [None]:
# Memory optimization functions
import os
import shutil
import subprocess
import gc
import time
from IPython.display import display, HTML, clear_output

# Install required packages first
!pip install -q psutil
import psutil

def clear_disk_space():
    """Clean up disk space by removing unnecessary files."""
    print("🧹 Cleaning up disk space...")
    
    # Clean apt cache
    subprocess.run("apt-get clean", shell=True)
    
    # Remove unnecessary packages
    subprocess.run("apt-get -y autoremove", shell=True)
    
    # Clean pip cache
    subprocess.run("rm -rf ~/.cache/pip", shell=True)
    
    # Remove temporary files
    temp_dirs = ['/tmp', '/var/tmp']
    for temp_dir in temp_dirs:
        if os.path.exists(temp_dir):
            try:
                for item in os.listdir(temp_dir):
                    item_path = os.path.join(temp_dir, item)
                    # Skip our ollama directories
                    if item.startswith('ollama') or item.startswith('backdoor'):
                        continue
                    
                    try:
                        if os.path.isdir(item_path):
                            shutil.rmtree(item_path)
                        else:
                            os.remove(item_path)
                    except Exception as e:
                        pass  # Skip files that can't be removed
            except Exception as e:
                print(f"Warning: Could not clean {temp_dir}: {e}")
    
    # Remove unused Docker images/containers if Docker is installed
    try:
        subprocess.run("docker system prune -af", shell=True, stderr=subprocess.DEVNULL)
    except:
        pass
    
    print("✅ Disk cleanup complete!")
    show_disk_usage()

def show_disk_usage():
    """Show current disk usage."""
    try:
        df_output = subprocess.check_output("df -h /", shell=True, text=True)
        print("\n📊 Disk Space Available:")
        for line in df_output.split('\n'):
            print(line)
    except:
        print("Could not retrieve disk usage information")

def show_memory_usage():
    """Show current memory usage."""
    try:
        memory = psutil.virtual_memory()
        total_gb = memory.total / (1024 ** 3)
        available_gb = memory.available / (1024 ** 3)
        used_gb = memory.used / (1024 ** 3)
        percent = memory.percent
        
        print(f"\n📊 Memory Usage:")
        print(f"Total Memory: {total_gb:.2f} GB")
        print(f"Available: {available_gb:.2f} GB")
        print(f"Used: {used_gb:.2f} GB ({percent}%)")
    except:
        print("Could not retrieve memory usage information")

def clear_memory():
    """Clear Python memory."""
    gc.collect()
    torch_available = False
    
    try:
        import torch
        torch_available = True
    except ImportError:
        pass
    
    if torch_available:
        try:
            import torch
            torch.cuda.empty_cache()
            print("✅ PyTorch CUDA cache cleared")
        except:
            pass
    
    print("✅ Python memory cleared")

def clean_model_files(keep_models=None):
    """Clean up model files to free space, optionally keeping specified models."""
    if keep_models is None:
        keep_models = []
    
    print(f"🧹 Cleaning model files (keeping: {', '.join(keep_models) if keep_models else 'none'})...")
    
    # Clean Ollama model files (except the ones specified to keep)
    ollama_dirs = ['/root/.ollama', '/tmp/ollama']
    
    for ollama_dir in ollama_dirs:
        if os.path.exists(ollama_dir):
            models_path = os.path.join(ollama_dir, 'models')
            if os.path.exists(models_path):
                for model_file in os.listdir(models_path):
                    should_keep = False
                    for keep_model in keep_models:
                        if keep_model in model_file:
                            should_keep = True
                            break
                    
                    if not should_keep:
                        try:
                            model_path = os.path.join(models_path, model_file)
                            if os.path.isdir(model_path):
                                shutil.rmtree(model_path)
                            else:
                                os.remove(model_path)
                            print(f"  - Removed: {model_file}")
                        except Exception as e:
                            print(f"  - Could not remove {model_file}: {e}")
    
    print("✅ Model cleanup complete!")

def monitor_download_progress(model_name):
    """Monitor the download progress of a model."""
    last_size = 0
    download_dir = '/root/.ollama/models'
    
    print(f"🔄 Monitoring download progress for {model_name}")
    
    try:
        while True:
            if not os.path.exists(download_dir):
                time.sleep(1)
                continue
                
            total_size = 0
            for root, dirs, files in os.walk(download_dir):
                for file in files:
                    if model_name.lower() in file.lower():
                        try:
                            file_path = os.path.join(root, file)
                            total_size += os.path.getsize(file_path)
                        except:
                            pass
            
            if total_size > last_size:
                clear_output(wait=True)
                print(f"Downloading {model_name}...")
                print(f"Downloaded: {total_size / (1024**3):.2f} GB")
                last_size = total_size
            
            time.sleep(2)
    except KeyboardInterrupt:
        print("Download monitoring stopped")

# Run optimization process
print("🚀 Optimizing environment for large language models...")
clear_disk_space()
clear_memory()

# Set environment variables for improved performance
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

# Show current resource usage
show_memory_usage()
show_disk_usage()

print("\n✅ Optimization complete! Ready to continue.")

## 2. Check GPU availability

Let's check if a GPU is available for this Colab session. This will help with model performance.

In [None]:
!nvidia-smi

import torch
import subprocess
import gc
import os
from IPython.display import display, HTML, clear_output

# Check if GPU is available
gpu_available = torch.cuda.is_available()
gpu_info = None

if gpu_available:
    # Clear CUDA cache if torch is available
    torch.cuda.empty_cache()
    
    gpu_info = {
        "name": torch.cuda.get_device_name(0),
        "count": torch.cuda.device_count(),
        "capability": torch.cuda.get_device_capability(0),
        "memory": torch.cuda.get_device_properties(0).total_memory / (1024**3)  # Convert to GB
    }
    display(HTML(f'''
    <div style="background-color:#d4edda; padding:10px; border-radius:5px; margin:10px 0;">
        <h3 style="color:#155724;">✅ GPU Detected</h3>
        <p><b>GPU Name:</b> {gpu_info['name']}</p>
        <p><b>GPU Memory:</b> {gpu_info['memory']:.2f} GB</p>
        <p><b>CUDA Capability:</b> {gpu_info['capability'][0]}.{gpu_info['capability'][1]}</p>
        <p>Your Colab instance has a GPU available. This will significantly improve model performance.</p>
    </div>'''))
else:
    display(HTML('''
    <div style="background-color:#f8d7da; padding:10px; border-radius:5px; margin:10px 0;">
        <h3 style="color:#721c24;">⚠️ No GPU Detected</h3>
        <p>Your Colab instance doesn't have a GPU. Models will run slower.</p>
        <p>Consider changing the runtime type to GPU:</p>
        <p><b>Runtime > Change runtime type > Hardware accelerator > GPU</b></p>
    </div>'''))
    
# Show memory and disk usage
show_memory_usage()
show_disk_usage()

## 3. Set up environment

Now let's install Ollama and required packages. We'll use a modified installation script to prevent systemd-related warnings and optimize for memory usage.

In [None]:
%%bash
# Clean up unnecessary packages to save space
apt-get clean
apt-get -y autoremove

# Install necessary packages first
apt-get update && apt-get install -y curl wget lshw pciutils

# Clean up after installation
apt-get clean

# Create Ollama directories
mkdir -p /tmp/ollama/models

# Get the Ollama installer but don't run it directly
curl -fsSL https://ollama.com/install.sh -o /tmp/ollama_install.sh

# Modify the install script to handle Colab environment (no systemd)
sed -i 's/systemctl daemon-reload/echo "Skipping systemctl: Not using systemd in Colab environment"/g' /tmp/ollama_install.sh
sed -i 's/systemctl enable ollama/echo "Skipping systemd service setup in Colab environment"/g' /tmp/ollama_install.sh
sed -i 's/systemctl start ollama/echo "Starting Ollama manually instead of via systemd"/g' /tmp/ollama_install.sh

# Run the modified installer
chmod +x /tmp/ollama_install.sh
OLLAMA_YES=1 /tmp/ollama_install.sh

# Install cloudflared for tunneling
wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
dpkg -i cloudflared-linux-amd64.deb
rm cloudflared-linux-amd64.deb  # Remove the deb file after installation

# Install minimal Python dependencies
pip install -q requests httpx ipywidgets 

# Clean pip cache
rm -rf ~/.cache/pip

## 4. Start Ollama server

Now we'll start the Ollama server with GPU acceleration if available and proper memory management.

In [None]:
import subprocess
import time
import requests
import json
import os
import gc
from IPython.display import clear_output, display, HTML

# Clear Python memory before starting server
gc.collect()
if gpu_available:
    torch.cuda.empty_cache()

# Set environment variables for Ollama
os.environ["OLLAMA_ORIGINS"] = "*"
os.environ["OLLAMA_HOST"] = "0.0.0.0:11434"

# If GPU available, set optimized CUDA params
if gpu_available:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Set low-level GPU memory optimizations
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

# Kill any existing Ollama processes
!pkill -f ollama || true
time.sleep(1)

# Start Ollama server in background
print("Starting Ollama server...")
ollama_process = subprocess.Popen(
    ["ollama", "serve"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True,
    env=os.environ
)

# Wait for Ollama to start
max_attempts = 3
attempt = 0
server_started = False

while attempt < max_attempts and not server_started:
    attempt += 1
    print(f"Starting attempt {attempt}/{max_attempts}...")
    time.sleep(5)  # Give it time to start
    
    try:
        response = requests.get("http://localhost:11434/api/version")
        if response.status_code == 200:
            server_started = True
            version_info = response.json()
            # Get memory usage after Ollama starts
            memory = psutil.virtual_memory()
            available_gb = memory.available / (1024 ** 3)
            used_gb = memory.used / (1024 ** 3)
            
            display(HTML(f'''
            <div style="background-color:#d4edda; padding:10px; border-radius:5px; margin:10px 0;">
                <h3 style="color:#155724;">✅ Ollama started successfully!</h3>
                <p><b>Version:</b> {version_info.get('version')}</p>
                <p><b>GPU Acceleration:</b> {'Enabled' if gpu_available else 'Not available'}</p>
                <p><b>Available Memory:</b> {available_gb:.2f} GB</p>
            </div>'''))
        else:
            print(f"❌ Ollama returned unexpected status: {response.status_code}")
    except Exception as e:
        print(f"❌ Failed to connect on attempt {attempt}: {e}")
        # Kill the process and try again if we still have attempts left
        if attempt < max_attempts:
            print("Restarting Ollama server...")
            if ollama_process:
                ollama_process.terminate()
                time.sleep(2)
            # Clear memory before retrying
            gc.collect()
            if gpu_available:
                torch.cuda.empty_cache()
            ollama_process = subprocess.Popen(
                ["ollama", "serve"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                env=os.environ
            )

if not server_started:
    display(HTML('''
    <div style="background-color:#f8d7da; padding:10px; border-radius:5px; margin:10px 0;">
        <h3 style="color:#721c24;">❌ Failed to start Ollama</h3>
        <p>Could not start the Ollama server after multiple attempts.</p>
        <p>Please check the output above for errors or try restarting the notebook.</p>
    </div>'''))

## 5. Prepare for model download

Before downloading models, let's clear any temporary files and optimize further to ensure maximum available space for the model.

In [None]:
# Clear existing models (if any) to save space
clean_model_files()

# Run one more disk cleanup
clear_disk_space()

# Show available resources
show_memory_usage()
show_disk_usage()

## 6. Choose and download a model

Now, let's download a model. We recommend models sized appropriately for your available resources.

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML
import threading

# Check available memory to make better recommendations
memory = psutil.virtual_memory()
available_gb = memory.available / (1024 ** 3)
disk_info = psutil.disk_usage('/')
available_disk_gb = disk_info.free / (1024 ** 3)

# Define recommended models based on GPU and available resources
if gpu_available:
    if available_gb > 30 and available_disk_gb > 80:
        # High resources available
        recommended_models = [
            {"name": "Llama4 (8B - Recommended)", "id": "llama4-8b:latest", "size": "~8GB"},
            {"name": "Llama4 (70B - Requires lots of RAM)", "id": "llama4:latest", "size": "~70GB"},
            {"name": "Llama4 Code (Code-specialized)", "id": "llama4-code:latest", "size": "~70GB"},
            {"name": "Mistral (7B)", "id": "mistral:latest", "size": "~7GB"},
            {"name": "Gemma (7B)", "id": "gemma:latest", "size": "~7GB"},
            {"name": "Neural Chat (7B)", "id": "neural-chat:latest", "size": "~7GB"},
            {"name": "Custom model (enter below)", "id": "custom", "size": "varies"}
        ]
    elif available_gb > 15 and available_disk_gb > 20:
        # Medium resources
        recommended_models = [
            {"name": "Llama4 (8B - Recommended)", "id": "llama4-8b:latest", "size": "~8GB"},
            {"name": "Mistral (7B)", "id": "mistral:latest", "size": "~7GB"},
            {"name": "Gemma (7B)", "id": "gemma:latest", "size": "~7GB"},
            {"name": "Neural Chat (7B)", "id": "neural-chat:latest", "size": "~7GB"},
            {"name": "Custom model (enter below)", "id": "custom", "size": "varies"}
        ]
    else:
        # Low resources
        recommended_models = [
            {"name": "Llama4 Tiny (Smallest - Recommended)", "id": "llama4-tiny:latest", "size": "~1.5GB"},
            {"name": "Gemma (2B)", "id": "gemma:2b", "size": "~2GB"},
            {"name": "Neural Chat (3B)", "id": "neural-chat:3b", "size": "~3GB"},
            {"name": "Custom model (enter below)", "id": "custom", "size": "varies"}
        ]
else:
    # Without GPU, recommend smaller models regardless of RAM
    recommended_models = [
        {"name": "Llama4 Tiny (Smallest - Recommended)", "id": "llama4-tiny:latest", "size": "~1.5GB"},
        {"name": "Gemma (2B)", "id": "gemma:2b", "size": "~2GB"},
        {"name": "Mistral Instruct (Small)", "id": "mistral-instruct:latest", "size": "~4GB"},
        {"name": "Custom model (enter below)", "id": "custom", "size": "varies"}
    ]

# Display available resources
display(HTML(f'''
<div style="background-color:#e9f5fb; padding:10px; border-radius:5px; margin:10px 0;">
    <h4 style="color:#0c63e4;">Available Resources</h4>
    <ul>
        <li><b>Memory:</b> {available_gb:.2f} GB available</li>
        <li><b>Disk:</b> {available_disk_gb:.2f} GB available</li>
        <li><b>GPU:</b> {'Yes' if gpu_available else 'No'}</li>
    </ul>
    <p>Models are recommended based on your available resources.</p>
</div>
'''))

# Create dropdown for model selection
model_dropdown = widgets.Dropdown(
    options=[(f"{model['name']} ({model['size']})", model['id']) for model in recommended_models],
    description='Select model:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

# Create text field for custom model
custom_model = widgets.Text(
    description='Custom model:',
    placeholder='Enter model name (e.g., llama4-7b:latest)',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%', display='none')
)

# Download button
download_button = widgets.Button(description='Download Model', button_style='primary')
output_area = widgets.Output()

# Function to handle model dropdown change
def on_model_change(change):
    if change['new'] == 'custom':
        custom_model.layout.display = 'block'
    else:
        custom_model.layout.display = 'none'

# Function to download model
def download_model(b):
    with output_area:
        clear_output()
        model_id = model_dropdown.value
        
        if model_id == 'custom':
            if not custom_model.value.strip():
                print("⚠️ Please enter a custom model name!")
                return
            model_id = custom_model.value.strip()
        
        # Run optimization before download
        print("🧹 Clearing space for model download...")
        clear_disk_space()
        clear_memory()
        
        print(f"🚀 Downloading model: {model_id}")
        print("This may take a while depending on the model size and your internet connection...")
        print("You'll see progress below. Please don't interrupt the process.")
        
        # Start a progress monitoring thread
        monitor_thread = threading.Thread(target=monitor_download_progress, args=(model_id,))
        monitor_thread.daemon = True
        monitor_thread.start()
        
        # Run ollama pull command
        process = subprocess.Popen(
            ["ollama", "pull", model_id],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True
        )
        
        # Show live output
        while True:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())
        
        return_code = process.poll()
        if return_code == 0:
            print(f"✅ Model {model_id} downloaded successfully!")
            # List available models
            print("\n📋 Available models:")
            !ollama list
            
            # Display memory usage after download
            show_memory_usage()
            show_disk_usage()
        else:
            print(f"❌ Failed to download model {model_id}. Return code: {return_code}")
            print("\nPossible reasons for failure:")
            print("- Not enough disk space")
            print("- Not enough memory")
            print("- Network issues")
            print("\nTry selecting a smaller model or freeing up more space.")

# Connect events
model_dropdown.observe(on_model_change, names='value')
download_button.on_click(download_model)

# Display widgets
display(widgets.HTML(f"<h3>Select a model to download (with{'' if gpu_available else 'out'} GPU acceleration):</h3>"))
display(model_dropdown)
display(custom_model)
display(download_button)
display(output_area)

## 7. Test the model

Let's make sure the model works by asking it a simple question. This also helps verify memory settings are correct.

In [None]:
import requests
import json
import time
from IPython.display import display, HTML

# Clear memory before testing
gc.collect()
if gpu_available:
    torch.cuda.empty_cache()

# Function to test a model
def test_model(model_id, prompt="Hi, I'm testing if you're working properly. Please give a brief greeting."):
    url = "http://localhost:11434/api/chat"
    payload = {
        "model": model_id,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "stream": False
    }
    
    try:
        response = requests.post(url, json=payload)
        if response.status_code == 200:
            result = response.json()
            return {
                "success": True,
                "response": result.get("message", {}).get("content", "No content returned")
            }
        else:
            return {
                "success": False,
                "error": f"Server returned status code {response.status_code}: {response.text}"
            }
    except Exception as e:
        return {
            "success": False,
            "error": str(e)
        }

# Get available models
try:
    response = requests.get("http://localhost:11434/api/tags")
    if response.status_code == 200:
        models = response.json().get("models", [])
        model_options = [(model.get("name"), model.get("name")) for model in models]
    else:
        model_options = [("No models found", "")]
except Exception as e:
    model_options = [(f"Error: {str(e)}", "")]

# Create widgets
test_model_dropdown = widgets.Dropdown(
    options=model_options,
    description='Model to test:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

test_prompt = widgets.Textarea(
    value="Hi, I'm testing if you're working properly. Please give a brief greeting.",
    description='Prompt:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%', height='100px')
)

test_button = widgets.Button(description='Test Model', button_style='success')
test_output = widgets.Output()

# Function to test model
def on_test_button_click(b):
    with test_output:
        clear_output()
        if not test_model_dropdown.value:
            print("⚠️ Please select a model to test!")
            return
        
        print(f"🔍 Testing model '{test_model_dropdown.value}' with prompt: \n{test_prompt.value}\n")
        print("Waiting for response...")
        
        # Clear memory before test
        gc.collect()
        if gpu_available:
            torch.cuda.empty_cache()
            
        start_time = time.time()
        result = test_model(test_model_dropdown.value, test_prompt.value)
        elapsed_time = time.time() - start_time
        
        if result["success"]:
            print(f"\n✅ Model responded successfully in {elapsed_time:.2f} seconds!\n")
            print("Response:")
            print("-----------------------------------")
            print(result["response"])
            print("-----------------------------------")
            
            # Show memory usage after test
            memory = psutil.virtual_memory()
            available_gb = memory.available / (1024 ** 3)
            used_gb = memory.used / (1024 ** 3)
            print(f"\nMemory after response: {used_gb:.2f} GB used, {available_gb:.2f} GB available")
        else:
            print(f"\n❌ Error testing model: {result['error']}")
            print("\nTry to free up more memory by:")
            print("1. Running 'clear_memory()' in a new cell")
            print("2. Restart the runtime if needed")
            print("3. Try with a smaller model")

# Connect events
test_button.on_click(on_test_button_click)

# Display widgets
display(widgets.HTML("<h3>Test your model:</h3>"))
display(test_model_dropdown)
display(test_prompt)
display(test_button)
display(test_output)

## 8. Set up a tunnel to access your Ollama instance

Now we'll set up a Cloudflare tunnel so your Backdoor AI application can access this Ollama instance. Copy the URL you get from this step into the Backdoor AI settings under the Ollama provider.

In [None]:
import subprocess
import threading
import time
import re
from IPython.display import display, HTML

# Clean up memory before starting tunnel
gc.collect()
if gpu_available:
    torch.cuda.empty_cache()

# Kill any existing cloudflared processes
!pkill -f cloudflared || true
time.sleep(1)

# Function to run cloudflared tunnel in a separate thread
def run_tunnel():
    global tunnel_process, tunnel_url
    tunnel_process = subprocess.Popen(
        ["cloudflared", "tunnel", "--url", "http://localhost:11434"],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True
    )
    
    # Extract tunnel URL
    tunnel_url = None
    url_pattern = re.compile(r'https://[\w.-]+\.trycloudflare\.com')
    
    while True:
        line = tunnel_process.stdout.readline()
        if not line and tunnel_process.poll() is not None:
            break
        
        match = url_pattern.search(line)
        if match and not tunnel_url:
            tunnel_url = match.group(0)
            print(f"\nTunnel URL found: {tunnel_url}\n")
        
        # Print all output for debugging
        print(line.strip())

# Start tunnel in a separate thread
tunnel_url = None
tunnel_thread = threading.Thread(target=run_tunnel)
tunnel_thread.daemon = True
tunnel_thread.start()

# Wait for tunnel URL to be available
attempts = 0
max_attempts = 30
while tunnel_url is None and attempts < max_attempts:
    time.sleep(1)
    attempts += 1
    if attempts % 5 == 0:
        print(f"Waiting for tunnel URL... ({attempts}/{max_attempts} seconds)")

# Display connection information
if tunnel_url:
    display(HTML(f'''
    <div style="background-color:#d4edda; padding:15px; border-radius:5px; margin:15px 0;">
        <h3 style="color:#155724;">🔗 Tunnel Created Successfully</h3>
        <p>Your Ollama instance is now accessible at:</p>
        <div style="background-color:#f8f9fa; padding:10px; border-radius:5px; font-family:monospace; margin:10px 0;">
            <b>{tunnel_url}</b>
        </div>
        <h4 style="color:#155724; margin-top:15px;">How to connect from Backdoor AI:</h4>
        <ol>
            <li>Go to Settings > LLM Provider Settings</li>
            <li>Select "Ollama" as your provider</li>
            <li>In the "API Base URL" field, enter: <b>{tunnel_url}</b></li>
            <li>Select your model from the dropdown</li>
            <li>Click "Save Settings"</li>
        </ol>
        <p><b>Important:</b> Keep this notebook running as long as you need the Ollama service. Closing it will terminate the tunnel.</p>
    </div>'''))
    
    # Also provide a simple example cURL command to test
    print("\nTest your tunnel connection with this command (from any machine):")
    print(f"curl -s {tunnel_url}/api/version")
else:
    display(HTML('''
    <div style="background-color:#f8d7da; padding:10px; border-radius:5px; margin:10px 0;">
        <h3 style="color:#721c24;">❌ Failed to create tunnel</h3>
        <p>Could not establish a cloudflared tunnel after multiple attempts.</p>
        <p>Please check the output above for errors or try the tunnel setup again.</p>
    </div>'''))

## Memory Management Utilities

If you encounter memory issues, you can run these commands to free up resources.

In [None]:
# This cell contains utility functions you can run if you encounter memory issues

# Show current memory and disk usage
show_memory_usage()
show_disk_usage()

# To clear Python memory, run:
# clear_memory()

# To free up disk space, run:
# clear_disk_space()

# To clean up model files (except the one you're using), run:
# For example, to keep only llama4-tiny:
# clean_model_files(['llama4-tiny'])

# If you need to restart the Ollama server, run:
# !pkill -f ollama
# time.sleep(2)
# !ollama serve &

# If you need to restart the tunnel, run:
# !pkill -f cloudflared
# Then go back to the tunnel creation cell and run it again