In [None]:
%pip install requests flask flask_cors

Yes, it's possible to install and run Ollama with the Gemma model from a Jupyter notebook and provide a RESTful API. Here's how to set it up:

## 1. Install Ollama

First, install Ollama on your Windows machine. You can download it from the official website or use the notebook to install it:



In [None]:
import subprocess
import sys
import requests
import json
import time

# Download and install Ollama (run this once)
def install_ollama():
    """Download and install Ollama on Windows"""
    print("Please download Ollama from: https://ollama.ai/download")
    print("Or run: winget install Ollama.Ollama")
    
# Uncomment to install
install_ollama()

In [None]:
import subprocess
import sys
import requests
import json
import time

# Download and install Ollama (run this once)
def install_ollama():
    """Download and install Ollama on Windows using winget"""
    try:
        print("Installing Ollama using winget...")
        result = subprocess.run(['winget', 'install', 'Ollama.Ollama'], 
                              capture_output=True, text=True, shell=True)
        
        if result.returncode == 0:
            print("Ollama installed successfully!")
            print("You may need to restart your terminal or add Ollama to PATH")
            return True
        else:
            print(f"Installation failed: {result.stderr}")
            print("Alternative: Download from https://ollama.ai/download")
            return False
            
    except Exception as e:
        print(f"Error installing Ollama: {e}")
        print("Manual installation required:")
        print("1. Download from: https://ollama.ai/download")
        print("2. Or run in PowerShell/CMD: winget install Ollama.Ollama")
        return False

# Install Ollama
install_ollama()

In [None]:
import subprocess
import sys
import time

def install_ollama_with_monitoring():
    """Install Ollama with detailed monitoring"""
    try:
        print("Starting Ollama installation with winget...")
        print("This may take several minutes...\n")
        
        # Run installation with real-time output
        process = subprocess.Popen(
            ['winget', 'install', 'Ollama.Ollama', '--accept-package-agreements', '--accept-source-agreements'],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            shell=True
        )
        
        # Monitor output in real-time
        while True:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())
        
        return_code = process.poll()
        
        if return_code == 0:
            print("\n✅ Installation completed successfully!")
            
            # Verify installation
            print("Verifying installation...")
            verify_result = subprocess.run(['ollama', '--version'], 
                                         capture_output=True, text=True, shell=True)
            if verify_result.returncode == 0:
                print(f"✅ Verification successful: {verify_result.stdout.strip()}")
            else:
                print("⚠️ Installation completed but Ollama not found in PATH")
                print("You may need to restart your terminal or VS Code")
            
            return True
        else:
            print(f"\n❌ Installation failed with return code: {return_code}")
            return False
            
    except Exception as e:
        print(f"❌ Error during installation: {e}")
        return False

def check_ollama_status():
    """Check if Ollama is installed and running"""
    print("Checking Ollama status...")
    
    # Check if Ollama is installed
    try:
        result = subprocess.run(['ollama', '--version'], 
                              capture_output=True, text=True, shell=True)
        if result.returncode == 0:
            print(f"✅ Ollama installed: {result.stdout.strip()}")
        else:
            print("❌ Ollama not found")
            return False
    except Exception as e:
        print(f"❌ Error checking Ollama: {e}")
        return False
    
    # Check if Ollama service is running
    try:
        service_check = subprocess.run(['sc', 'query', 'Ollama'], 
                                     capture_output=True, text=True, shell=True)
        if 'RUNNING' in service_check.stdout:
            print("✅ Ollama service is running")
        else:
            print("⚠️ Ollama service is not running")
    except Exception as e:
        print(f"⚠️ Could not check service status: {e}")
    
    return True

# Check current status first
if not check_ollama_status():
    print("\nProceeding with installation...")
    install_ollama_with_monitoring()
    print("\nRechecking status after installation...")
    check_ollama_status()
else:
    print("Ollama is already installed!")



## 2. Pull and Run Gemma Model



In [1]:
import subprocess
import sys
import time

# Pull the Gemma model (this will take some time)
def setup_gemma():
    """Pull the Gemma model using Ollama"""
    try:
        # Pull gemma model
        # result = subprocess.run(['ollama', 'pull', 'gemma3:4b'], 
        #                       capture_output=True, text=True, shell=True)
        result = subprocess.run(['ollama', 'pull', 'gemma3:1b'], 
                              capture_output=True, text=True, shell=True)
        print("Gemma model pulled successfully!")
        return True
    except Exception as e:
        print(f"Error pulling model: {e}")
        return False

# Run Ollama server in background
def start_ollama_server():
    """Start Ollama server"""
    try:
        subprocess.Popen(['ollama', 'serve'], shell=True)
        time.sleep(5)  # Wait for server to start
        print("Ollama server started on http://localhost:11434")
        return True
    except Exception as e:
        print(f"Error starting server: {e}")
        return False

# Setup the environment
# setup_gemma()
start_ollama_server()

Ollama server started on http://localhost:11434


True



## 3. Create RESTful API Wrapper



In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading

class OllamaAPI:
    def __init__(self):
        self.app = Flask(__name__)
        CORS(self.app)  # Enable CORS for Oxygen XML Editor
        self.setup_routes()
        
    def setup_routes(self):
        @self.app.route('/api/generate', methods=['POST'])
        def generate():
            try:
                data = request.get_json()
                prompt = data.get('prompt', '')
                # model = data.get('model', 'gemma3:4b')
                model = data.get('model', 'gemma3:1b')
                
                # Call Ollama API
                response = requests.post('http://localhost:11434/api/generate', 
                                       json={
                                           'model': model,
                                           'prompt': prompt,
                                           'stream': False
                                       })
                
                if response.status_code == 200:
                    result = response.json()
                    return jsonify({
                        'success': True,
                        'response': result.get('response', ''),
                        'model': model
                    })
                else:
                    return jsonify({
                        'success': False,
                        'error': 'Failed to generate response'
                    }), 500
                    
            except Exception as e:
                return jsonify({
                    'success': False,
                    'error': str(e)
                }), 500
        
        @self.app.route('/api/models', methods=['GET'])
        def list_models():
            try:
                response = requests.get('http://localhost:11434/api/tags')
                if response.status_code == 200:
                    return jsonify(response.json())
                else:
                    return jsonify({'error': 'Failed to fetch models'}), 500
            except Exception as e:
                return jsonify({'error': str(e)}), 500
    
    def run(self, host='localhost', port=5000):
        self.app.run(host=host, port=port, debug=False)

# Create and start the API server
api = OllamaAPI()

# Run in a separate thread to avoid blocking the notebook
def start_api_server():
    api.run(host='0.0.0.0', port=5000)

server_thread = threading.Thread(target=start_api_server, daemon=True)
server_thread.start()

print("API server started on http://localhost:5000")
print("Available endpoints:")
print("- POST /api/generate - Generate text")
print("- GET /api/models - List available models")

API server started on http://localhost:5000
Available endpoints:
- POST /api/generate - Generate text
- GET /api/models - List available models


 * Serving Flask app '__main__'
 * Debug mode: off
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.50.43:5000

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.50.43:5000
Press CTRL+C to quitPress CTRL+C to quit

127.0.0.1 - - [30/May/2025 23:14:01] "POST /api/generate HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2025 23:14:01] "POST /api/generate HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2025 23:14:28] "GET /api/models HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2025 23:14:28] "GET /api/models HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2025 23:14:54] "127.0.0.1 - - [30/May/2025 23:14:54] "GET /api/health HTTP/1.1GET /api/health HTTP/1.1" 404 -
" 404 -
127.0.0.1 - - [30/May/2025 23:15:27] "POST /api/generate HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2025 23:15:27] "POST /api/generate HTTP/1.1" 200 -




## 4. Test the API



In [3]:
import requests

# Test the API
def test_api():
    """Test the local API"""
    test_data = {
        'prompt': 'Explain what machine learning is in simple terms.',
        # 'model': 'gemma3:4b'
        'model': 'gemma3:1b'
    }
    
    try:
        response = requests.post('http://127.0.0.1:5000/api/generate', 
                               json=test_data)
        if response.status_code == 200:
            result = response.json()
            print("API Test Successful!")
            print(f"Response: {result['response']}")
        else:
            print(f"API Test Failed: {response.status_code}")
    except Exception as e:
        print(f"API Test Error: {e}")

# Wait a moment for server to start, then test
time.sleep(3)
test_api()

API Test Successful!
Response: Okay, let's break down machine learning in simple terms.

**Imagine you're teaching a dog a new trick.** You don't tell the dog *exactly* how to do it step-by-step. Instead, you show it examples, reward good behavior, and correct mistakes. Eventually, the dog learns to do the trick on its own. 

**Machine learning is kind of like that, but for computers.** 

Here’s a simplified explanation:

1. **You give the computer a lot of data.** This data could be anything – pictures of cats, customer reviews, sales figures, etc. 

2. **The computer analyzes this data.** It looks for patterns and trends. It tries to find relationships between the data.

3. **The computer learns from these patterns.**  It builds a "model" – a simplified representation of the data – that helps it make predictions or decisions about new data.

4. **You give the computer a new input.**  Now, it uses its model to make a guess or prediction about what will happen next.

**Think of it like

When you get "API Test Failed: 500", it indicates a server-side error. Here are the most likely causes and how to fix them:

## Common Issues Causing HTTP 500:

### 1. **Ollama Server Not Running**
The most common cause - your Flask API is running but can't connect to Ollama:



In [2]:
import requests
import subprocess
import time

def diagnose_api_error():
    """Diagnose API 500 errors"""
    
    # Check if Ollama server is running
    print("1. Checking if Ollama server is running...")
    try:
        response = requests.get('http://localhost:11434/api/tags', timeout=5)
        if response.status_code == 200:
            print("✅ Ollama server is running")
            models = response.json().get('models', [])
            print(f"Available models: {[m['name'] for m in models]}")
        else:
            print("❌ Ollama server returned error:", response.status_code)
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Ollama server on localhost:11434")
        print("Starting Ollama server...")
        subprocess.Popen(['ollama', 'serve'], shell=True)
        time.sleep(10)
        return False
    except Exception as e:
        print(f"❌ Error checking Ollama: {e}")
        return False
    
    # Check if Flask API is running
    print("\n2. Checking Flask API...")
    try:
        response = requests.get('http://localhost:5000/api/models', timeout=5)
        print(f"Flask API status: {response.status_code}")
    except Exception as e:
        print(f"❌ Flask API error: {e}")
    
    # Test with a simple model request
    print("\n3. Testing direct Ollama API...")
    try:
        test_request = {
            'model': 'gemma3:1b',
            'prompt': 'Hello',
            'stream': False
        }
        response = requests.post('http://localhost:11434/api/generate', 
                               json=test_request, timeout=30)
        print(f"Direct Ollama test: {response.status_code}")
        if response.status_code != 200:
            print(f"Error: {response.text}")
    except Exception as e:
        print(f"❌ Direct Ollama test failed: {e}")

# Run diagnosis
diagnose_api_error()

1. Checking if Ollama server is running...
✅ Ollama server is running
Available models: ['gemma3:1b', 'gemma:2b', 'gemma3:4b']

2. Checking Flask API...
❌ Flask API error: HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /api/models (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000217ED451D30>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

3. Testing direct Ollama API...
Direct Ollama test: 200




### 2. **Model Not Downloaded**
Gemma3:4b model might not be pulled yet:



In [5]:
def ensure_model_available():
    """Ensure the Gemma model is available"""
    print("Checking if gemma3:1b model is available...")
    
    try:
        # List available models
        result = subprocess.run(['ollama', 'list'], 
                              capture_output=True, text=True, shell=True)
        
        if 'gemma3:1b' in result.stdout:
            print("✅ gemma3:1b model is available")
            return True
        else:
            print("❌ gemma3:4b model not found")
            print("Available models:")
            print(result.stdout)
            
            print("Pulling gemma3:4b model (this may take several minutes)...")
            pull_result = subprocess.run(['ollama', 'pull', 'gemma3:4b'], 
                                       capture_output=True, text=True, shell=True)
            
            if pull_result.returncode == 0:
                print("✅ Model pulled successfully")
                return True
            else:
                print("❌ Failed to pull model")
                print(pull_result.stderr)
                return False
                
    except Exception as e:
        print(f"Error checking models: {e}")
        return False

ensure_model_available()

Checking if gemma3:1b model is available...
✅ gemma3:1b model is available


True



### 3. **Enhanced Error Handling in Flask API**
Add better error handling to see exactly what's failing:



In [6]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import requests
import traceback

class OllamaAPI:
    def __init__(self):
        self.app = Flask(__name__)
        CORS(self.app)
        self.setup_routes()
        
    def setup_routes(self):
        @self.app.route('/api/generate', methods=['POST'])
        def generate():
            try:
                data = request.get_json()
                if not data:
                    return jsonify({'success': False, 'error': 'No JSON data provided'}), 400
                
                prompt = data.get('prompt', '')
                model = data.get('model', 'gemma3:4b')
                
                if not prompt:
                    return jsonify({'success': False, 'error': 'No prompt provided'}), 400
                
                print(f"Generating response for prompt: {prompt[:50]}...")
                
                # Test Ollama connection first
                try:
                    health_check = requests.get('http://localhost:11434/api/tags', timeout=5)
                    if health_check.status_code != 200:
                        return jsonify({
                            'success': False, 
                            'error': f'Ollama server not responding: {health_check.status_code}'
                        }), 500
                except requests.exceptions.ConnectionError:
                    return jsonify({
                        'success': False, 
                        'error': 'Cannot connect to Ollama server on localhost:11434'
                    }), 500
                
                # Call Ollama API
                response = requests.post('http://localhost:11434/api/generate', 
                                       json={
                                           'model': model,
                                           'prompt': prompt,
                                           'stream': False
                                       }, timeout=60)
                
                if response.status_code == 200:
                    result = response.json()
                    return jsonify({
                        'success': True,
                        'response': result.get('response', ''),
                        'model': model
                    })
                else:
                    return jsonify({
                        'success': False,
                        'error': f'Ollama API error: {response.status_code} - {response.text}'
                    }), 500
                    
            except Exception as e:
                error_traceback = traceback.format_exc()
                print(f"Exception in generate endpoint: {error_traceback}")
                return jsonify({
                    'success': False,
                    'error': f'Server error: {str(e)}',
                    'traceback': error_traceback
                }), 500
        
        @self.app.route('/api/health', methods=['GET'])
        def health():
            """Health check endpoint"""
            try:
                # Check Ollama connection
                ollama_response = requests.get('http://localhost:11434/api/tags', timeout=5)
                return jsonify({
                    'flask_api': 'running',
                    'ollama_server': 'running' if ollama_response.status_code == 200 else 'error',
                    'ollama_status_code': ollama_response.status_code
                })
            except Exception as e:
                return jsonify({
                    'flask_api': 'running',
                    'ollama_server': 'not_reachable',
                    'error': str(e)
                })
    
    def run(self, host='localhost', port=5000):
        self.app.run(host=host, port=port, debug=True)

# Create and start the enhanced API server
api = OllamaAPI()

def start_api_server():
    api.run(host='0.0.0.0', port=5000)

server_thread = threading.Thread(target=start_api_server, daemon=True)
server_thread.start()

print("Enhanced API server started on http://localhost:5000")
time.sleep(2)

# Test health endpoint
try:
    health_response = requests.get('http://localhost:5000/api/health')
    print(f"Health check: {health_response.json()}")
except Exception as e:
    print(f"Health check failed: {e}")

Enhanced API server started on http://localhost:5000
 * Serving Flask app '__main__'
 * Debug mode: on
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.50.43:5000
Press CTRL+C to quit
Exception in thread Thread-8 (start_api_server):
Traceback (most recent call last):
  File "C:\Users\jeffw\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Project\OCR\.venv\Lib\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "C:\Users\jeffw\AppData\Local\Programs\Python\Python312\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\jeffw\AppData\Local\Temp\ipykernel_13520\3509073467.py", line 98, in start_api_server
  File "C:\Users\jeffw\AppData\Local\Temp\ipykernel_13520\3509073467.py", line 92, in run
  File "c:\Project\OCR\.venv\Lib\site-packages\flask\app.py", line 662, in run
    run_simple(t.cast(str, host), port, self, **options)
  File "c:\Project\OCR\.venv\Lib\site-packages\wer

Health check failed: Expecting value: line 1 column 1 (char 0)




### 4. **Updated Test Function**
Use a more robust test function:



In [7]:
def test_api_enhanced():
    """Enhanced API test with better error reporting"""
    print("Testing API with enhanced error reporting...")
    
    test_data = {
        'prompt': 'What is 2+2?',
        'model': 'gemma3:1b'
    }
    
    try:
        print("Sending request to Flask API...")
        response = requests.post('http://127.0.0.1:5000/api/generate', 
                               json=test_data, timeout=60)
        
        print(f"Response status code: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✅ API Test Successful!")
            print(f"Response: {result.get('response', 'No response')}")
        else:
            print(f"❌ API Test Failed: {response.status_code}")
            try:
                error_detail = response.json()
                print(f"Error details: {error_detail}")
            except:
                print(f"Raw response: {response.text}")
    except requests.exceptions.Timeout:
        print("❌ Request timed out - model might be loading")
    except Exception as e:
        print(f"❌ API Test Error: {e}")

# Wait for server to start, then test
time.sleep(5)
test_api_enhanced()

Testing API with enhanced error reporting...
Sending request to Flask API...
Response status code: 200
✅ API Test Successful!
Response: 2 + 2 = 4

Response status code: 200
✅ API Test Successful!
Response: 2 + 2 = 4





Run these diagnostic functions to identify the exact cause of the 500 error. The most common fix is ensuring Ollama server is running and the model is downloaded.

Collecting workspace informationYou can check if the Gemma model is successfully pulled from Ollama using several methods. Here are the most effective ways:

## 1. **Check Available Models with `ollama list`**



In [6]:
import subprocess

def check_pulled_models():
    """Check all models pulled by Ollama"""
    try:
        result = subprocess.run(['ollama', 'list'], 
                              capture_output=True, text=True, shell=True)
        
        if result.returncode == 0:
            print("✅ Successfully retrieved model list:")
            print(result.stdout)
            
            # Check specifically for gemma3:1b
            if 'gemma3:1b' in result.stdout:
                print("✅ gemma3:1b model is available!")
                return True
            else:
                print("❌ gemma3:1b model not found in the list")
                return False
        else:
            print(f"❌ Error running ollama list: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error checking models: {e}")
        return False

# Check models
check_pulled_models()

✅ Successfully retrieved model list:
NAME         ID              SIZE      MODIFIED    
gemma3:1b    8648f39daa8f    815 MB    5 hours ago    
gemma3:4b    a2af6cc3eb7f    3.3 GB    9 hours ago    

✅ gemma3:1b model is available!


True



## 2. **Use Ollama API to List Models**



In [7]:
import requests

def check_models_via_api():
    """Check models using Ollama's REST API"""
    try:
        response = requests.get('http://localhost:11434/api/tags', timeout=10)
        
        if response.status_code == 200:
            models_data = response.json()
            models = models_data.get('models', [])
            
            print("✅ Available models via API:")
            for model in models:
                name = model.get('name', 'Unknown')
                size = model.get('size', 0)
                modified = model.get('modified_at', 'Unknown')
                print(f"  - {name} (Size: {size:,} bytes, Modified: {modified})")
            
            # Check for gemma3:4b specifically
            gemma_models = [m for m in models if 'gemma3:4b' in m.get('name', '')]
            if gemma_models:
                print("✅ gemma3:4b model found!")
                return True
            else:
                print("❌ gemma3:4b model not found")
                return False
        else:
            print(f"❌ API request failed: {response.status_code}")
            return False
            
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Ollama server. Make sure it's running.")
        return False
    except Exception as e:
        print(f"❌ Error checking models via API: {e}")
        return False

# Check via API
check_models_via_api()

✅ Available models via API:
  - gemma3:1b (Size: 815,319,791 bytes, Modified: 2025-05-31T01:51:33.6340204+08:00)
  - gemma3:4b (Size: 3,338,801,804 bytes, Modified: 2025-05-30T22:20:59.6846172+08:00)
✅ gemma3:4b model found!


True



## 3. **Test Model with Simple Generation**



In [8]:
def test_model_generation():
    """Test if the model can actually generate text"""
    try:
        test_request = {
            # 'model': 'gemma3:4b',
            'model': 'gemma3:1b',
            'prompt': 'Hello, say hi back to me.',
            'stream': False
        }
        
        print("Testing model generation...")
        response = requests.post('http://localhost:11434/api/generate', 
                               json=test_request, timeout=30)
        
        if response.status_code == 200:
            result = response.json()
            generated_text = result.get('response', '')
            print("✅ Model generation test successful!")
            print(f"Prompt: {test_request['prompt']}")
            print(f"Response: {generated_text}")
            return True
        else:
            print(f"❌ Model generation failed: {response.status_code}")
            print(f"Error: {response.text}")
            return False
            
    except Exception as e:
        print(f"❌ Error testing model generation: {e}")
        return False

# Test generation
test_model_generation()

Testing model generation...
✅ Model generation test successful!
Prompt: Hello, say hi back to me.
Response: Hello to you too! 😊 How can I help you today?


True



## 4. **Complete Model Status Check Function**



In [9]:
def complete_model_check():
    """Complete check for gemma3:4b model availability and functionality"""
    print("=" * 50)
    print("COMPLETE GEMMA3:4B MODEL STATUS CHECK")
    print("=" * 50)
    
    # Step 1: Check if Ollama is running
    print("\n1. Checking Ollama server...")
    try:
        health = requests.get('http://localhost:11434/api/tags', timeout=5)
        if health.status_code == 200:
            print("✅ Ollama server is running")
        else:
            print("❌ Ollama server error")
            return False
    except:
        print("❌ Ollama server not reachable")
        return False
    
    # Step 2: Check model list via command
    print("\n2. Checking models via command line...")
    cmd_result = check_pulled_models()
    
    # Step 3: Check model list via API
    print("\n3. Checking models via API...")
    api_result = check_models_via_api()
    
    # Step 4: Test actual generation
    print("\n4. Testing model generation...")
    gen_result = test_model_generation()
    
    # Summary
    print("\n" + "=" * 50)
    print("SUMMARY:")
    print(f"Command line check: {'✅ PASS' if cmd_result else '❌ FAIL'}")
    print(f"API check: {'✅ PASS' if api_result else '❌ FAIL'}")
    print(f"Generation test: {'✅ PASS' if gen_result else '❌ FAIL'}")
    
    if all([cmd_result, api_result, gen_result]):
        print("🎉 gemma3:1b model is fully functional!")
        return True
    else:
        print("⚠️ There are issues with the gemma3:1b model")
        return False

# Run complete check
complete_model_check()

COMPLETE GEMMA3:4B MODEL STATUS CHECK

1. Checking Ollama server...
✅ Ollama server is running

2. Checking models via command line...
✅ Successfully retrieved model list:
NAME         ID              SIZE      MODIFIED    
gemma3:1b    8648f39daa8f    815 MB    5 hours ago    
gemma3:4b    a2af6cc3eb7f    3.3 GB    9 hours ago    

✅ gemma3:1b model is available!

3. Checking models via API...
✅ Available models via API:
  - gemma3:1b (Size: 815,319,791 bytes, Modified: 2025-05-31T01:51:33.6340204+08:00)
  - gemma3:4b (Size: 3,338,801,804 bytes, Modified: 2025-05-30T22:20:59.6846172+08:00)
✅ gemma3:4b model found!

4. Testing model generation...
Testing model generation...
✅ Model generation test successful!
Prompt: Hello, say hi back to me.
Response: Hello there! 👋 How can I help you today?

SUMMARY:
Command line check: ✅ PASS
API check: ✅ PASS
Generation test: ✅ PASS
🎉 gemma3:1b model is fully functional!


True



## 5. **If Model is Not Found, Pull It**



In [None]:
def ensure_gemma_model():
    """Ensure gemma3:4b model is available, pull if necessary"""
    if not complete_model_check():
        print("\n🔄 Model not found or not working. Attempting to pull...")
        
        try:
            print("Pulling gemma3:4b model (this may take several minutes)...")
            pull_process = subprocess.Popen(
                ['ollama', 'pull', 'gemma3:4b'],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
                shell=True
            )
            
            # Show progress
            while True:
                output = pull_process.stdout.readline()
                if output == '' and pull_process.poll() is not None:
                    break
                if output:
                    print(output.strip())
            
            if pull_process.returncode == 0:
                print("✅ Model pulled successfully!")
                # Check again
                return complete_model_check()
            else:
                print("❌ Failed to pull model")
                return False
                
        except Exception as e:
            print(f"❌ Error pulling model: {e}")
            return False
    else:
        print("✅ Model is already available and working!")
        return True

# Ensure model is available
ensure_gemma_model()



Run these functions in your localLlmWIntelGPU.ipynb notebook to verify that the Gemma model is successfully pulled and working. The complete check will tell you exactly what's working and what needs to be fixed.



## 5. Oxygen XML Editor Integration

For Oxygen XML Editor, you can create a connector using the REST API:

**Connector Configuration:**
- **URL**: `http://localhost:5000/api/generate`
- **Method**: POST
- **Content-Type**: application/json
- **Request Body**:


In [None]:
{
    "prompt": "${prompt}",
    "model": "gemma:2b"
}



**Usage in Oxygen XML Editor:**
1. Go to **Options** > **Preferences** > **External Tools**
2. Create a new external tool with the API endpoint
3. Configure parameters to pass selected text as prompt
4. Set up response handling to insert generated content

## 6. Enhanced API with Intel GPU Support



In [None]:
# For Intel GPU optimization, ensure you have Intel Extension for PyTorch
def setup_intel_gpu():
    """Setup Intel GPU acceleration for better performance"""
    try:
        import intel_extension_for_pytorch as ipex
        print("Intel Extension for PyTorch available")
        return True
    except ImportError:
        print("Installing Intel Extension for PyTorch...")
        subprocess.run([sys.executable, '-m', 'pip', 'install', 'intel_extension_for_pytorch'])
        return False

setup_intel_gpu()

# Enhanced API endpoint with performance monitoring
@api.app.route('/api/generate/enhanced', methods=['POST'])
def generate_enhanced():
    start_time = time.time()
    try:
        data = request.get_json()
        prompt = data.get('prompt', '')
        model = data.get('model', 'gemma:2b')
        max_tokens = data.get('max_tokens', 100)
        
        response = requests.post('http://localhost:11434/api/generate', 
                               json={
                                   'model': model,
                                   'prompt': prompt,
                                   'stream': False,
                                   'options': {
                                       'num_predict': max_tokens
                                   }
                               })
        
        processing_time = time.time() - start_time
        
        if response.status_code == 200:
            result = response.json()
            return jsonify({
                'success': True,
                'response': result.get('response', ''),
                'model': model,
                'processing_time': processing_time,
                'tokens_generated': len(result.get('response', '').split())
            })
        else:
            return jsonify({
                'success': False,
                'error': 'Failed to generate response',
                'processing_time': processing_time
            }), 500
            
    except Exception as e:
        return jsonify({
            'success': False,
            'error': str(e),
            'processing_time': time.time() - start_time
        }), 500

print("Enhanced API endpoint added: POST /api/generate/enhanced")



This setup provides a complete solution for running Ollama with Gemma model locally and exposing it via REST API for Oxygen XML Editor integration. The API runs on port 5000 and provides endpoints for text generation and model management.

You can remove models from Ollama using several methods. Here are the most effective ways:

## 1. **Using Ollama Command Line**

The simplest method is using the `ollama rm` command:



In [5]:
import subprocess

def remove_ollama_model(model_name):
    """Remove a specific model from Ollama"""
    try:
        print(f"Removing model: {model_name}")
        result = subprocess.run(['ollama', 'rm', model_name], 
                              capture_output=True, text=True, shell=True)
        
        if result.returncode == 0:
            print(f"✅ Model {model_name} removed successfully!")
            print(result.stdout)
            return True
        else:
            print(f"❌ Failed to remove model {model_name}")
            print(f"Error: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error removing model: {e}")
        return False

# Example: Remove gemma3:4b model
remove_ollama_model('gemma:2b')

# # Example: Remove gemma3:1b model
# remove_ollama_model('gemma3:1b')

Removing model: gemma:2b
✅ Model gemma:2b removed successfully!
deleted 'gemma:2b'



True



## 2. **List and Remove Models Interactively**



In [None]:
def list_and_remove_models():
    """List available models and allow interactive removal"""
    try:
        # First, list all available models
        result = subprocess.run(['ollama', 'list'], 
                              capture_output=True, text=True, shell=True)
        
        if result.returncode == 0:
            print("Available models:")
            print(result.stdout)
            
            # Parse model names from output
            lines = result.stdout.strip().split('\n')[1:]  # Skip header
            models = []
            for line in lines:
                if line.strip():
                    model_name = line.split()[0]
                    models.append(model_name)
            
            if not models:
                print("No models found to remove.")
                return
            
            print("\nModels available for removal:")
            for i, model in enumerate(models, 1):
                print(f"{i}. {model}")
            
            # You can modify this to remove specific models
            # For example, remove all gemma models:
            gemma_models = [m for m in models if 'gemma' in m.lower()]
            
            if gemma_models:
                print(f"\nFound Gemma models: {gemma_models}")
                for model in gemma_models:
                    remove_ollama_model(model)
            else:
                print("No Gemma models found.")
                
        else:
            print(f"❌ Error listing models: {result.stderr}")
            
    except Exception as e:
        print(f"❌ Error: {e}")

# Run the interactive removal
list_and_remove_models()



## 3. **Remove All Models (Clean Slate)**



In [None]:
def remove_all_models():
    """Remove all models from Ollama"""
    try:
        # Get list of all models
        result = subprocess.run(['ollama', 'list'], 
                              capture_output=True, text=True, shell=True)
        
        if result.returncode == 0:
            lines = result.stdout.strip().split('\n')[1:]  # Skip header
            models = []
            for line in lines:
                if line.strip():
                    model_name = line.split()[0]
                    models.append(model_name)
            
            if not models:
                print("No models found to remove.")
                return
            
            print(f"Found {len(models)} models to remove:")
            for model in models:
                print(f"  - {model}")
            
            # Remove each model
            for model in models:
                print(f"\nRemoving {model}...")
                remove_result = subprocess.run(['ollama', 'rm', model], 
                                             capture_output=True, text=True, shell=True)
                
                if remove_result.returncode == 0:
                    print(f"✅ {model} removed successfully")
                else:
                    print(f"❌ Failed to remove {model}: {remove_result.stderr}")
            
            print("\n✅ All models removal process completed!")
            
        else:
            print(f"❌ Error listing models: {result.stderr}")
            
    except Exception as e:
        print(f"❌ Error: {e}")

# Uncomment to remove all models (use with caution!)
# remove_all_models()



## 4. **Check Storage Space After Removal**



In [13]:
def check_ollama_storage():
    """Check storage used by Ollama after removal"""
    import os
    import shutil
    
    # Common Ollama data directories
    possible_paths = [
        os.path.expanduser("~/.ollama"),
        os.path.expanduser("~/AppData/Local/ollama"),
        os.path.expanduser("~/AppData/Roaming/ollama"),
        "C:\\Users\\{}\\AppData\\Local\\ollama".format(os.getenv('USERNAME'))
    ]
    
    print("Checking Ollama storage usage:")
    
    for path in possible_paths:
        if os.path.exists(path):
            try:
                total_size = 0
                for dirpath, dirnames, filenames in os.walk(path):
                    for filename in filenames:
                        filepath = os.path.join(dirpath, filename)
                        total_size += os.path.getsize(filepath)
                
                size_gb = total_size / (1024**3)
                print(f"📁 {path}: {size_gb:.2f} GB")
                
            except Exception as e:
                print(f"❌ Error checking {path}: {e}")
        else:
            print(f"📁 {path}: Not found")

# Check storage usage
check_ollama_storage()

Checking Ollama storage usage:
📁 C:\Users\jeffw/.ollama: 3.87 GB
📁 C:\Users\jeffw/AppData/Local/ollama: 0.00 GB
📁 C:\Users\jeffw/AppData/Roaming/ollama: Not found
📁 C:\Users\jeffw\AppData\Local\ollama: 0.00 GB




## 5. **Complete Cleanup Function**



In [None]:
def complete_ollama_cleanup():
    """Complete cleanup of Ollama models and verification"""
    print("=" * 50)
    print("COMPLETE OLLAMA CLEANUP")
    print("=" * 50)
    
    # Step 1: List current models
    print("\n1. Current models:")
    list_result = subprocess.run(['ollama', 'list'], 
                                capture_output=True, text=True, shell=True)
    if list_result.returncode == 0:
        print(list_result.stdout)
    else:
        print("No models or error listing models")
    
    # Step 2: Remove all models
    print("\n2. Removing all models...")
    remove_all_models()
    
    # Step 3: Verify removal
    print("\n3. Verifying removal...")
    verify_result = subprocess.run(['ollama', 'list'], 
                                  capture_output=True, text=True, shell=True)
    if verify_result.returncode == 0:
        lines = verify_result.stdout.strip().split('\n')[1:]
        remaining_models = [line for line in lines if line.strip()]
        
        if not remaining_models:
            print("✅ All models successfully removed!")
        else:
            print(f"⚠️ {len(remaining_models)} models still present:")
            print(verify_result.stdout)
    
    # Step 4: Check storage
    print("\n4. Storage check after cleanup:")
    check_ollama_storage()
    
    print("\n" + "=" * 50)
    print("CLEANUP COMPLETE")
    print("=" * 50)

# Run complete cleanup (uncomment to use)
# complete_ollama_cleanup()



## Quick Commands Summary:

**Remove specific model:**


In [None]:
ollama rm gemma3:4b



**Remove multiple models:**


In [None]:
ollama rm gemma3:4b gemma3:1b



**List models before removal:**


In [None]:
ollama list



Use these functions to safely remove models from Ollama. The `complete_ollama_cleanup()` function will remove all models if you want to start fresh.

Based on your Jupyter notebook and the Oxygen AI Positron documentation, here are the necessary steps to integrate your Ollama + Gemma3:1b RESTful service with Oxygen XML Editor:

## 1. **Ensure Your Ollama RESTful Service is Running**

First, make sure your Flask API is running from your notebook:



In [None]:
# Verify your API is running
import requests
import time

def verify_ollama_service():
    """Verify both Ollama and Flask API are running"""
    try:
        # Check Ollama server
        ollama_response = requests.get('http://localhost:11434/api/tags', timeout=5)
        print(f"✅ Ollama server: {ollama_response.status_code}")
        
        # Check Flask API
        flask_response = requests.get('http://localhost:5000/api/health', timeout=5)
        print(f"✅ Flask API: {flask_response.status_code}")
        
        # Test generation with gemma3:1b
        test_data = {
            'prompt': 'Hello, this is a test.',
            'model': 'gemma3:1b'
        }
        
        gen_response = requests.post('http://localhost:5000/api/generate', 
                                   json=test_data, timeout=30)
        
        if gen_response.status_code == 200:
            result = gen_response.json()
            print("✅ Generation test successful!")
            print(f"Response: {result.get('response', '')[:]}...")
            return True
        else:
            print(f"❌ Generation test failed: {gen_response.status_code}")
            return False
            
    except Exception as e:
        print(f"❌ Service verification failed: {e}")
        return False

# Run verification
verify_ollama_service()



## 2. **Create Oxygen AI Positron Custom Connector Configuration**

Create a JSON configuration file for your custom connector:



In [None]:
{
  "name": "Local Ollama Gemma3:1b",
  "description": "Local Ollama server with Gemma3:1b model for text generation",
  "endpoint": "http://localhost:5000/api/generate",
  "method": "POST",
  "headers": {
    "Content-Type": "application/json",
    "Accept": "application/json"
  },
  "requestBody": {
    "prompt": "${input}",
    "model": "gemma3:1b",
    "max_tokens": 1000
  },
  "responseMapping": {
    "textPath": "$.response",
    "errorPath": "$.error"
  },
  "timeout": 60000,
  "streaming": false
}



## 3. **Install and Configure Oxygen AI Positron Add-on**

### Step 3.1: Install the Add-on
1. Open Oxygen XML Editor
2. Go to **Help** > **Install new add-ons**
3. Add the update site: `https://www.oxygenxml.com/InstData/Addons/default/updateSite.xml`
4. Search for "AI Positron" and install it
5. Restart Oxygen XML Editor

### Step 3.2: Configure the Custom Connector
1. Go to **Options** > **Preferences** > **Plugins** > **AI Positron**
2. Click **Add Custom Connector**
3. Provide the configuration details:



In [None]:
{
  "connectorName": "Local Ollama Gemma3:1b",
  "baseURL": "http://localhost:5000",
  "apiKey": "",
  "requestConfig": {
    "endpoint": "/api/generate",
    "method": "POST",
    "headers": {
      "Content-Type": "application/json"
    },
    "bodyTemplate": {
      "prompt": "${prompt}",
      "model": "gemma3:1b"
    }
  },
  "responseConfig": {
    "textFieldPath": "response",
    "errorFieldPath": "error"
  }
}



## 4. **Enhanced Flask API for Oxygen Integration**

Update your Flask API to be more compatible with Oxygen AI Positron:



In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import requests
import traceback

class OxygenOllamaAPI:
    def __init__(self):
        self.app = Flask(__name__)
        CORS(self.app)  # Enable CORS for Oxygen XML Editor
        self.setup_routes()
        
    def setup_routes(self):
        @self.app.route('/api/generate', methods=['POST', 'OPTIONS'])
        def generate():
            # Handle preflight requests
            if request.method == 'OPTIONS':
                response = jsonify({'status': 'ok'})
                response.headers.add('Access-Control-Allow-Origin', '*')
                response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
                response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
                return response
            
            try:
                data = request.get_json()
                if not data:
                    return jsonify({'error': 'No JSON data provided'}), 400
                
                # Handle both 'prompt' and 'input' fields for compatibility
                prompt = data.get('prompt') or data.get('input', '')
                model = data.get('model', 'gemma3:1b')
                max_tokens = data.get('max_tokens', 500)
                
                if not prompt:
                    return jsonify({'error': 'No prompt provided'}), 400
                
                print(f"🤖 Processing request: {prompt[:50]}...")
                
                # Test Ollama connection
                try:
                    health_check = requests.get('http://localhost:11434/api/tags', timeout=5)
                    if health_check.status_code != 200:
                        return jsonify({
                            'error': f'Ollama server not responding: {health_check.status_code}'
                        }), 500
                except requests.exceptions.ConnectionError:
                    return jsonify({
                        'error': 'Cannot connect to Ollama server on localhost:11434'
                    }), 500
                
                # Call Ollama API
                ollama_request = {
                    'model': model,
                    'prompt': prompt,
                    'stream': False,
                    'options': {
                        'num_predict': max_tokens,
                        'temperature': 0.7
                    }
                }
                
                response = requests.post('http://localhost:11434/api/generate', 
                                       json=ollama_request, timeout=120)
                
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('response', '')
                    
                    # Format response for Oxygen AI Positron
                    oxygen_response = {
                        'response': generated_text,
                        'model': model,
                        'success': True,
                        'metadata': {
                            'tokens_generated': len(generated_text.split()),
                            'model_used': model
                        }
                    }
                    
                    print(f"✅ Generated {len(generated_text)} characters")
                    return jsonify(oxygen_response)
                else:
                    error_msg = f'Ollama API error: {response.status_code} - {response.text}'
                    print(f"❌ {error_msg}")
                    return jsonify({'error': error_msg}), 500
                    
            except Exception as e:
                error_traceback = traceback.format_exc()
                print(f"❌ Exception in generate endpoint: {error_traceback}")
                return jsonify({
                    'error': f'Server error: {str(e)}',
                    'success': False
                }), 500
        
        @self.app.route('/api/health', methods=['GET'])
        def health():
            """Health check endpoint for Oxygen"""
            try:
                ollama_response = requests.get('http://localhost:11434/api/tags', timeout=5)
                models = ollama_response.json().get('models', []) if ollama_response.status_code == 200 else []
                
                return jsonify({
                    'status': 'healthy',
                    'ollama_server': 'running' if ollama_response.status_code == 200 else 'error',
                    'available_models': [m.get('name', '') for m in models],
                    'flask_api': 'running'
                })
            except Exception as e:
                return jsonify({
                    'status': 'unhealthy',
                    'error': str(e)
                })
        
        @self.app.route('/api/models', methods=['GET'])
        def list_models():
            """List available models for Oxygen configuration"""
            try:
                response = requests.get('http://localhost:11434/api/tags', timeout=10)
                if response.status_code == 200:
                    models_data = response.json()
                    models = []
                    for model in models_data.get('models', []):
                        models.append({
                            'name': model.get('name', ''),
                            'size': model.get('size', 0),
                            'modified': model.get('modified_at', '')
                        })
                    return jsonify({'models': models})
                else:
                    return jsonify({'error': 'Failed to fetch models'}), 500
            except Exception as e:
                return jsonify({'error': str(e)}), 500
    
    def run(self, host='0.0.0.0', port=5000):
        self.app.run(host=host, port=port, debug=False, threaded=True)

# Create and start the enhanced API server for Oxygen
oxygen_api = OxygenOllamaAPI()

def start_oxygen_api_server():
    oxygen_api.run(host='0.0.0.0', port=5000)

# Stop previous server if running
try:
    requests.get('http://localhost:5000/api/health', timeout=1)
    print("⚠️ Previous server detected. Starting new server...")
except:
    pass

server_thread = threading.Thread(target=start_oxygen_api_server, daemon=True)
server_thread.start()

print("🚀 Oxygen-compatible API server started on http://localhost:5000")
print("Available endpoints:")
print("- POST /api/generate - Generate text (Oxygen AI Positron endpoint)")
print("- GET /api/health - Health check")
print("- GET /api/models - List available models")

# Test the enhanced API
time.sleep(3)
test_data = {
    'input': 'Write a brief summary about XML editing.',
    'model': 'gemma3:1b'
}

try:
    response = requests.post('http://localhost:5000/api/generate', json=test_data, timeout=30)
    if response.status_code == 200:
        result = response.json()
        print(f"\n✅ Test successful!")
        print(f"Response: {result.get('response', '')[:200]}...")
    else:
        print(f"❌ Test failed: {response.status_code}")
        print(response.text)
except Exception as e:
    print(f"❌ Test error: {e}")



## 5. **Configure Oxygen AI Positron Settings**

In Oxygen XML Editor:

### Step 5.1: Basic Configuration
1. Go to **Options** > **Preferences** > **Plugins** > **AI Positron**
2. Set the following:
   - **Provider**: Custom
   - **Base URL**: `http://localhost:5000`
   - **API Key**: Leave empty (not needed for local server)

### Step 5.2: Advanced Configuration
Create a connector configuration file:



In [None]:
{
  "id": "local-ollama-gemma",
  "name": "Local Ollama Gemma3:1b",
  "description": "Local Ollama server with Gemma3:1b model",
  "type": "custom",
  "config": {
    "baseUrl": "http://localhost:5000",
    "endpoints": {
      "generate": {
        "path": "/api/generate",
        "method": "POST",
        "headers": {
          "Content-Type": "application/json"
        },
        "requestBodyTemplate": {
          "input": "${prompt}",
          "model": "gemma3:1b",
          "max_tokens": 1000
        },
        "responseMapping": {
          "textPath": "$.response",
          "errorPath": "$.error"
        }
      },
      "health": {
        "path": "/api/health",
        "method": "GET"
      }
    },
    "timeout": 60000,
    "retryAttempts": 2
  }
}



## 6. **Create Custom Actions in Oxygen**

### Step 6.1: Document Actions
1. Go to **Options** > **Menu Shortcut Keys**
2. Create new actions for common AI tasks:

#### Action 1: Summarize Selected Text
- **Name**: "AI Summarize"
- **Shortcut**: `Ctrl+Alt+S`
- **Operation**: Call AI Positron with prompt: "Summarize the following text: ${selection}"

#### Action 2: Improve Writing
- **Name**: "AI Improve Writing"
- **Shortcut**: `Ctrl+Alt+I`  
- **Operation**: Call AI Positron with prompt: "Improve the writing and clarity of: ${selection}"

#### Action 3: Generate Documentation
- **Name**: "AI Generate Docs"
- **Shortcut**: `Ctrl+Alt+D`
- **Operation**: Call AI Positron with prompt: "Generate documentation for: ${selection}"

### Step 6.2: Create Custom Framework
Create a custom framework file `oxygen-ai-actions.framework`:



In [None]:
<?xml version="1.0" encoding="UTF-8"?>
<serialized version="24.1" xml:space="preserve">
    <serializableOrderedMap>
        <entry>
            <String>document.types</String>
            <documentTypeDescriptor-array>
                <documentTypeDescriptor>
                    <field name="name">
                        <String>AI Enhanced XML</String>
                    </field>
                    <field name="description">
                        <String>XML editing with AI assistance</String>
                    </field>
                    <field name="priority">
                        <Integer>3</Integer>
                    </field>
                    <field name="authorActions">
                        <authorAction-array>
                            <authorAction>
                                <field name="id">
                                    <String>ai.summarize</String>
                                </field>
                                <field name="name">
                                    <String>AI Summarize</String>
                                </field>
                                <field name="description">
                                    <String>Summarize selected text using AI</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ai.positron.generate</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Summarize this text: ${selection}</String>
                                        </field>
                                    </operation>
                                </field>
                            </authorAction>
                        </authorAction-array>
                    </field>
                </documentTypeDescriptor>
            </documentTypeDescriptor-array>
        </entry>
    </serializableOrderedMap>
</serialized>



## 7. **Test the Integration**

### Step 7.1: Basic Test
1. Open an XML document in Oxygen
2. Select some text
3. Use **AI Positron** > **Generate** or your custom action
4. Verify the response is inserted correctly

### Step 7.2: Advanced Test Script
Create a test validation script:



In [None]:
import requests
import json

def test_oxygen_integration():
    """Test the integration with various prompts"""
    
    test_cases = [
        {
            "name": "Simple generation",
            "prompt": "What is XML?",
            "expected_length": 50
        },
        {
            "name": "Code explanation",
            "prompt": "Explain this XML: <book><title>Example</title></book>",
            "expected_length": 100
        },
        {
            "name": "Documentation generation",
            "prompt": "Generate documentation for an XML schema",
            "expected_length": 200
        }
    ]
    
    print("🧪 Testing Oxygen AI Positron Integration")
    print("=" * 50)
    
    for i, test in enumerate(test_cases, 1):
        print(f"\nTest {i}: {test['name']}")
        
        try:
            response = requests.post('http://localhost:5000/api/generate', 
                                   json={
                                       'input': test['prompt'],
                                       'model': 'gemma3:1b'
                                   }, 
                                   timeout=30)
            
            if response.status_code == 200:
                result = response.json()
                generated_text = result.get('response', '')
                
                if len(generated_text) >= test['expected_length']:
                    print(f"✅ PASS - Generated {len(generated_text)} characters")
                    print(f"Preview: {generated_text[:100]}...")
                else:
                    print(f"⚠️ PARTIAL - Short response: {len(generated_text)} chars")
            else:
                print(f"❌ FAIL - HTTP {response.status_code}")
                print(response.text)
                
        except Exception as e:
            print(f"❌ ERROR - {e}")
    
    print("\n" + "=" * 50)
    print("✅ Integration test completed!")

# Run the test
test_oxygen_integration()



## 8. **Troubleshooting Common Issues**

### Issue 1: CORS Errors
If you get CORS errors in Oxygen:



In [None]:
# Enhanced CORS configuration
from flask_cors import CORS

# Add this to your Flask app initialization
CORS(oxygen_api.app, 
     origins=['*'],
     methods=['GET', 'POST', 'OPTIONS'],
     allow_headers=['Content-Type', 'Authorization', 'X-Requested-With'])



### Issue 2: Timeout Issues
Increase timeout values:



In [None]:
{
  "timeout": 120000,
  "retryAttempts": 3,
  "retryDelay": 1000
}



### Issue 3: Model Not Found
Verify your model is available:



In [None]:
# Check available models
def check_models():
    result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, shell=True)
    print("Available models:")
    print(result.stdout)

check_models()



This setup provides a complete integration between your local Ollama Gemma3:1b service and Oxygen XML Editor via the AI Positron add-on, allowing you to use AI assistance directly within your XML editing workflow.

Similar code found with 1 license type

Based on your Jupyter notebook and the Oxygen AI Positron 5.0 documentation with the sample plugin, here's a comprehensive guide to integrate your Ollama + Gemma3:1b RESTful service with Oxygen XML Editor:

## 1. **Prepare Your Ollama RESTful Service for Oxygen Integration**

First, enhance your existing Flask API to be fully compatible with Oxygen AI Positron 5.0:



In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import requests
import traceback
import time

class OxygenOllamaAPI:
    def __init__(self):
        self.app = Flask(__name__)
        # Enhanced CORS configuration for Oxygen XML Editor
        CORS(self.app, 
             origins=['*'],
             methods=['GET', 'POST', 'OPTIONS', 'PUT', 'DELETE'],
             allow_headers=['Content-Type', 'Authorization', 'X-Requested-With', 'Accept'],
             expose_headers=['Content-Type', 'Authorization'],
             supports_credentials=False)
        self.setup_routes()
        
    def setup_routes(self):
        @self.app.route('/api/generate', methods=['POST', 'OPTIONS'])
        def generate():
            # Handle CORS preflight requests
            if request.method == 'OPTIONS':
                response = jsonify({'status': 'ok'})
                response.headers.add('Access-Control-Allow-Origin', '*')
                response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization,X-Requested-With')
                response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
                return response
            
            try:
                data = request.get_json()
                if not data:
                    return jsonify({'error': 'No JSON data provided'}), 400
                
                # Handle multiple input field formats for Oxygen compatibility
                prompt = (data.get('prompt') or 
                         data.get('input') or 
                         data.get('text') or 
                         data.get('message', ''))
                
                model = data.get('model', 'gemma3:1b')
                max_tokens = data.get('max_tokens', 1000)
                temperature = data.get('temperature', 0.7)
                
                if not prompt:
                    return jsonify({'error': 'No prompt provided'}), 400
                
                print(f"🤖 Processing Oxygen request: {prompt[:50]}...")
                
                # Test Ollama connection
                try:
                    health_check = requests.get('http://localhost:11434/api/tags', timeout=5)
                    if health_check.status_code != 200:
                        return jsonify({
                            'error': f'Ollama server not responding: {health_check.status_code}'
                        }), 500
                except requests.exceptions.ConnectionError:
                    return jsonify({
                        'error': 'Cannot connect to Ollama server on localhost:11434'
                    }), 500
                
                # Call Ollama API with proper options
                ollama_request = {
                    'model': model,
                    'prompt': prompt,
                    'stream': False,
                    'options': {
                        'num_predict': max_tokens,
                        'temperature': temperature,
                        'top_k': 40,
                        'top_p': 0.9
                    }
                }
                
                response = requests.post('http://localhost:11434/api/generate', 
                                       json=ollama_request, timeout=120)
                
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('response', '')
                    
                    # Format response for Oxygen AI Positron compatibility
                    # Following the sample plugin structure
                    oxygen_response = {
                        'text': generated_text,  # Primary response field for Oxygen
                        'response': generated_text,  # Alternative field
                        'content': generated_text,  # Another alternative
                        'choices': [{  # OpenAI-compatible format
                            'text': generated_text,
                            'message': {
                                'content': generated_text,
                                'role': 'assistant'
                            }
                        }],
                        'model': model,
                        'success': True,
                        'usage': {
                            'total_tokens': len(generated_text.split())
                        },
                        'metadata': {
                            'tokens_generated': len(generated_text.split()),
                            'model_used': model,
                            'processing_time': result.get('total_duration', 0)
                        }
                    }
                    
                    print(f"✅ Generated {len(generated_text)} characters for Oxygen")
                    return jsonify(oxygen_response)
                else:
                    error_msg = f'Ollama API error: {response.status_code} - {response.text}'
                    print(f"❌ {error_msg}")
                    return jsonify({'error': error_msg}), 500
                    
            except Exception as e:
                error_traceback = traceback.format_exc()
                print(f"❌ Exception in generate endpoint: {error_traceback}")
                return jsonify({
                    'error': f'Server error: {str(e)}',
                    'success': False
                }), 500
        
        @self.app.route('/api/chat/completions', methods=['POST', 'OPTIONS'])
        def chat_completions():
            """OpenAI-compatible endpoint for Oxygen AI Positron"""
            if request.method == 'OPTIONS':
                response = jsonify({'status': 'ok'})
                response.headers.add('Access-Control-Allow-Origin', '*')
                response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
                response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
                return response
                
            try:
                data = request.get_json()
                messages = data.get('messages', [])
                model = data.get('model', 'gemma3:1b')
                
                # Extract prompt from messages
                if messages:
                    prompt = messages[-1].get('content', '')
                else:
                    prompt = data.get('prompt', '')
                
                # Use the same generation logic
                ollama_request = {
                    'model': model,
                    'prompt': prompt,
                    'stream': False,
                    'options': {
                        'num_predict': data.get('max_tokens', 1000),
                        'temperature': data.get('temperature', 0.7)
                    }
                }
                
                response = requests.post('http://localhost:11434/api/generate', 
                                       json=ollama_request, timeout=120)
                
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('response', '')
                    
                    # OpenAI-compatible response format for Oxygen
                    openai_response = {
                        'choices': [{
                            'message': {
                                'role': 'assistant',
                                'content': generated_text
                            },
                            'finish_reason': 'stop',
                            'index': 0
                        }],
                        'model': model,
                        'usage': {
                            'total_tokens': len(generated_text.split()),
                            'prompt_tokens': len(prompt.split()),
                            'completion_tokens': len(generated_text.split())
                        },
                        'object': 'chat.completion'
                    }
                    
                    return jsonify(openai_response)
                else:
                    return jsonify({'error': 'Generation failed'}), 500
                    
            except Exception as e:
                return jsonify({'error': str(e)}), 500
        
        @self.app.route('/api/health', methods=['GET'])
        def health():
            """Health check endpoint for Oxygen monitoring"""
            try:
                ollama_response = requests.get('http://localhost:11434/api/tags', timeout=5)
                models = ollama_response.json().get('models', []) if ollama_response.status_code == 200 else []
                
                return jsonify({
                    'status': 'healthy',
                    'service': 'ollama-gemma-api',
                    'version': '1.0.0',
                    'ollama_server': 'running' if ollama_response.status_code == 200 else 'error',
                    'available_models': [m.get('name', '') for m in models],
                    'flask_api': 'running',
                    'endpoints': [
                        '/api/generate',
                        '/api/chat/completions',
                        '/api/health',
                        '/api/models'
                    ]
                })
            except Exception as e:
                return jsonify({
                    'status': 'unhealthy',
                    'error': str(e)
                })
        
        @self.app.route('/api/models', methods=['GET'])
        def list_models():
            """List available models in OpenAI-compatible format"""
            try:
                response = requests.get('http://localhost:11434/api/tags', timeout=10)
                if response.status_code == 200:
                    models_data = response.json()
                    models = []
                    for model in models_data.get('models', []):
                        models.append({
                            'id': model.get('name', ''),
                            'object': 'model',
                            'created': 0,
                            'owned_by': 'ollama',
                            'name': model.get('name', ''),
                            'size': model.get('size', 0),
                            'modified': model.get('modified_at', '')
                        })
                    return jsonify({'data': models, 'object': 'list'})
                else:
                    return jsonify({'error': 'Failed to fetch models'}), 500
            except Exception as e:
                return jsonify({'error': str(e)}), 500
    
    def run(self, host='0.0.0.0', port=5000):
        self.app.run(host=host, port=port, debug=False, threaded=True)

# Create and start the enhanced API server
oxygen_api = OxygenOllamaAPI()

def start_oxygen_api_server():
    oxygen_api.run(host='0.0.0.0', port=5000)

# Start server in background thread
server_thread = threading.Thread(target=start_oxygen_api_server, daemon=True)
server_thread.start()

print("🚀 Oxygen AI Positron 5.0 compatible API server started on http://localhost:5000")
print("Available endpoints:")
print("- POST /api/generate - Generate text (primary endpoint)")
print("- POST /api/chat/completions - OpenAI-compatible endpoint")
print("- GET /api/health - Health check")
print("- GET /api/models - List available models")

# Test the API
time.sleep(3)
test_data = {
    'input': 'Write a brief summary about XML editing.',
    'model': 'gemma3:1b'
}

try:
    response = requests.post('http://localhost:5000/api/generate', json=test_data, timeout=30)
    if response.status_code == 200:
        result = response.json()
        print(f"\n✅ API Test successful!")
        print(f"Response: {result.get('text', result.get('response', ''))[:200]}...")
    else:
        print(f"❌ API Test failed: {response.status_code}")
        print(response.text)
except Exception as e:
    print(f"❌ API Test error: {e}")



## 2. **Install Oxygen AI Positron 5.0 Add-on**

### Step 2.1: Install the Add-on
1. Open Oxygen XML Editor
2. Go to **Help** > **Install new add-ons**
3. Add the update site: `https://www.oxygenxml.com/InstData/Addons/default/updateSite.xml`
4. Search for "AI Positron" and install version 5.0+
5. Restart Oxygen XML Editor

### Step 2.2: Verify Installation
1. Check **Options** > **Preferences** > **Plugins** for "AI Positron"
2. Verify the add-on is enabled and version 5.0+

## 3. **Create Custom Connector Following Sample Plugin Structure**

Based on the GitHub sample plugin, create a custom connector configuration:



In [None]:
{
  "id": "local-ollama-gemma3-1b",
  "name": "Local Ollama Gemma3:1b",
  "description": "Local Ollama server with Gemma3:1b model for text generation",
  "version": "1.0.0",
  "type": "custom",
  "provider": "ollama",
  "baseUrl": "http://localhost:5000",
  "authentication": {
    "type": "none",
    "required": false
  },
  "capabilities": {
    "textGeneration": true,
    "chatCompletion": true,
    "streaming": false
  },
  "endpoints": {
    "textGeneration": {
      "path": "/api/generate",
      "method": "POST",
      "headers": {
        "Content-Type": "application/json",
        "Accept": "application/json"
      },
      "requestBodyTemplate": {
        "input": "${prompt}",
        "model": "gemma3:1b",
        "max_tokens": "${maxTokens:1000}",
        "temperature": "${temperature:0.7}"
      },
      "responseMapping": {
        "textPath": "$.text",
        "alternativeTextPaths": [
          "$.response", 
          "$.content", 
          "$.choices[0].text",
          "$.choices[0].message.content"
        ],
        "errorPath": "$.error"
      }
    },
    "chatCompletion": {
      "path": "/api/chat/completions",
      "method": "POST",
      "headers": {
        "Content-Type": "application/json",
        "Accept": "application/json"
      },
      "requestBodyTemplate": {
        "messages": [
          {
            "role": "user",
            "content": "${prompt}"
          }
        ],
        "model": "gemma3:1b",
        "max_tokens": "${maxTokens:1000}",
        "temperature": "${temperature:0.7}"
      },
      "responseMapping": {
        "textPath": "$.choices[0].message.content",
        "errorPath": "$.error"
      }
    },
    "models": {
      "path": "/api/models",
      "method": "GET",
      "responseMapping": {
        "modelsPath": "$.data",
        "modelIdPath": "$.id",
        "modelNamePath": "$.name"
      }
    },
    "health": {
      "path": "/api/health",
      "method": "GET"
    }
  },
  "defaultParameters": {
    "model": "gemma3:1b",
    "maxTokens": 1000,
    "temperature": 0.7
  },
  "timeout": 120000,
  "retryAttempts": 2,
  "retryDelay": 1000
}



## 4. **Configure Oxygen AI Positron Settings**

### Step 4.1: Basic Configuration
1. Go to **Options** > **Preferences** > **Plugins** > **AI Positron**
2. Click **Add Custom Connector**
3. Import the JSON configuration from Step 3
4. Set the following:
   - **Name**: `Local Ollama Gemma3:1b`
   - **Base URL**: `http://localhost:5000`
   - **API Key**: Leave empty
   - **Default Model**: `gemma3:1b`

### Step 4.2: Test Connection
1. In the Custom Connector configuration dialog
2. Click **Test Connection**
3. Verify successful connection to your Ollama service

## 5. **Create Custom Actions and Framework**

### Step 5.1: Create Enhanced Framework File

Create `oxygen-ai-gemma-framework.framework`:



In [None]:
<?xml version="1.0" encoding="UTF-8"?>
<serialized version="25.1" xml:space="preserve">
    <serializableOrderedMap>
        <entry>
            <String>document.types</String>
            <documentTypeDescriptor-array>
                <documentTypeDescriptor>
                    <field name="name">
                        <String>AI Enhanced XML with Gemma3</String>
                    </field>
                    <field name="description">
                        <String>XML editing with AI assistance using local Gemma3:1b model via Oxygen AI Positron 5.0</String>
                    </field>
                    <field name="priority">
                        <Integer>3</Integer>
                    </field>
                    <field name="authorActions">
                        <authorAction-array>
                            <!-- AI Summarize Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.summarize</String>
                                </field>
                                <field name="name">
                                    <String>AI Summarize with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Summarize selected text using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Please provide a concise and clear summary of the following text. Focus on the main points and key information:

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>REPLACE</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>500</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt S</String>
                                </field>
                            </authorAction>
                            
                            <!-- AI Improve Writing Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.improve</String>
                                </field>
                                <field name="name">
                                    <String>AI Improve Writing with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Improve writing style and clarity using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Please improve the writing, clarity, and flow of the following text while maintaining its original meaning and intent. Make it more professional and readable:

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>REPLACE</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>800</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt I</String>
                                </field>
                            </authorAction>
                            
                            <!-- AI Generate Documentation Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.document</String>
                                </field>
                                <field name="name">
                                    <String>AI Generate Documentation with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Generate comprehensive documentation using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Please generate comprehensive technical documentation for the following code or content. Include purpose, parameters, usage examples, and relevant notes:

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>AFTER</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>1200</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt D</String>
                                </field>
                            </authorAction>
                            
                            <!-- AI Explain XML Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.explain.xml</String>
                                </field>
                                <field name="name">
                                    <String>AI Explain XML with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Explain XML structure and content using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Please explain the following XML structure in detail. Describe what it represents, its elements, attributes, and how it should be used:

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>AFTER</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>800</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt E</String>
                                </field>
                            </authorAction>
                            
                            <!-- AI Generate XML Schema Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.schema</String>
                                </field>
                                <field name="name">
                                    <String>AI Generate XML Schema with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Generate XML Schema (XSD) using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Based on the following XML structure, please generate a corresponding XML Schema (XSD) with appropriate element declarations, types, constraints, and documentation:

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>AFTER</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>1500</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt X</String>
                                </field>
                            </authorAction>
                            
                            <!-- AI Translate Text Action -->
                            <authorAction>
                                <field name="id">
                                    <String>ai.gemma.translate</String>
                                </field>
                                <field name="name">
                                    <String>AI Translate with Gemma3</String>
                                </field>
                                <field name="description">
                                    <String>Translate selected text using local Gemma3:1b model</String>
                                </field>
                                <field name="operation">
                                    <operation>
                                        <field name="id">
                                            <String>ro.sync.ecss.extensions.commons.operations.ai.PositronGenerateOperation</String>
                                        </field>
                                        <field name="arg-connector">
                                            <String>local-ollama-gemma3-1b</String>
                                        </field>
                                        <field name="arg-prompt">
                                            <String>Please translate the following text to English (if it's in another language) or provide a translation to Spanish (if it's in English):

${selection}</String>
                                        </field>
                                        <field name="arg-insertPosition">
                                            <String>AFTER</String>
                                        </field>
                                        <field name="arg-maxTokens">
                                            <String>600</String>
                                        </field>
                                    </operation>
                                </field>
                                <field name="accelerator">
                                    <String>ctrl alt T</String>
                                </field>
                            </authorAction>
                        </authorAction-array>
                    </field>
                </documentTypeDescriptor>
            </documentTypeDescriptor-array>
        </entry>
    </serializableOrderedMap>
</serialized>



### Step 5.2: Install the Framework
1. Save the framework file to your Oxygen frameworks directory
2. Go to **Options** > **Preferences** > **Document Type Association**
3. Import the framework or create a new document type based on it

## 6. **Create Sample Plugin (Following GitHub Sample)**

Based on the sample plugin structure, create a custom plugin:



In [None]:
// Create directory: c:\Project\OCR\OxygenGemmaPlugin\src\main\java\com\example\oxygen\gemma\

// File: OxygenGemmaPlugin.java
package com.example.oxygen.gemma;

import ro.sync.exml.plugin.Plugin;
import ro.sync.exml.plugin.PluginDescriptor;
import ro.sync.exml.workspace.api.standalone.StandalonePluginWorkspace;

public class OxygenGemmaPlugin extends Plugin {
    
    public static final String PLUGIN_ID = "oxygen.gemma.ai.plugin";
    
    @Override
    public void applicationStarted(StandalonePluginWorkspace workspace) {
        // Add menu customizer for Gemma AI actions
        workspace.addMenuBarCustomizer(new GemmaMenuCustomizer());
        
        // Add toolbar customizer
        workspace.addToolbarComponentsCustomizer(new GemmaToolbarCustomizer());
        
        // Register custom AI connector
        workspace.getUtilAccess().addExtension(new GemmaAIConnectorExtension());
    }
    
    @Override
    public PluginDescriptor getDescriptor() {
        return new PluginDescriptor() {
            @Override
            public String getID() {
                return PLUGIN_ID;
            }
            
            @Override
            public String getDescription() {
                return "Integration plugin for local Ollama Gemma3:1b model with Oxygen AI Positron 5.0";
            }
            
            @Override
            public String getName() {
                return "Oxygen Gemma AI Plugin";
            }
            
            @Override
            public String getVendor() {
                return "Custom Development";
            }
            
            @Override
            public String getVersion() {
                return "1.0.0";
            }
        };
    }
}

// File: GemmaMenuCustomizer.java
package com.example.oxygen.gemma;

import ro.sync.exml.workspace.api.standalone.MenuBarCustomizer;
import ro.sync.exml.workspace.api.standalone.StandalonePluginWorkspace;
import javax.swing.*;

public class GemmaMenuCustomizer implements MenuBarCustomizer {
    
    @Override
    public void customizeMainMenu(JMenuBar menuBar, StandalonePluginWorkspace workspace) {
        // Create Gemma AI menu
        JMenu gemmaMenu = new JMenu("Gemma AI");
        
        // Add actions
        gemmaMenu.add(createSummarizeAction(workspace));
        gemmaMenu.add(createImproveAction(workspace));
        gemmaMenu.add(createDocumentAction(workspace));
        gemmaMenu.addSeparator();
        gemmaMenu.add(createExplainAction(workspace));
        gemmaMenu.add(createSchemaAction(workspace));
        
        // Add to menu bar
        menuBar.add(gemmaMenu);
    }
    
    private JMenuItem createSummarizeAction(StandalonePluginWorkspace workspace) {
        JMenuItem item = new JMenuItem("Summarize with Gemma3");
        item.addActionListener(e -> {
            // Trigger AI Positron summarize action
            workspace.getEditorAccess().invokeAction("ai.gemma.summarize");
        });
        return item;
    }
    
    private JMenuItem createImproveAction(StandalonePluginWorkspace workspace) {
        JMenuItem item = new JMenuItem("Improve Writing with Gemma3");
        item.addActionListener(e -> {
            workspace.getEditorAccess().invokeAction("ai.gemma.improve");
        });
        return item;
    }
    
    private JMenuItem createDocumentAction(StandalonePluginWorkspace workspace) {
        JMenuItem item = new JMenuItem("Generate Documentation with Gemma3");
        item.addActionListener(e -> {
            workspace.getEditorAccess().invokeAction("ai.gemma.document");
        });
        return item;
    }
    
    private JMenuItem createExplainAction(StandalonePluginWorkspace workspace) {
        JMenuItem item = new JMenuItem("Explain XML with Gemma3");
        item.addActionListener(e -> {
            workspace.getEditorAccess().invokeAction("ai.gemma.explain.xml");
        });
        return item;
    }
    
    private JMenuItem createSchemaAction(StandalonePluginWorkspace workspace) {
        JMenuItem item = new JMenuItem("Generate Schema with Gemma3");
        item.addActionListener(e -> {
            workspace.getEditorAccess().invokeAction("ai.gemma.schema");
        });
        return item;
    }
}

// File: plugin.xml
<?xml version="1.0" encoding="UTF-8"?>
<plugin
 id="oxygen.gemma.ai.plugin"
 name="Oxygen Gemma AI Plugin"
 description="Integration plugin for local Ollama Gemma3:1b model with Oxygen AI Positron 5.0"
 version="1.0.0"
 vendor="Custom Development"
 class="com.example.oxygen.gemma.OxygenGemmaPlugin">
 
 <runtime>
  <library name="lib/oxygen-gemma-plugin.jar"/>
 </runtime>
 
 <extension point="WorkspaceAccess">
  <workspace class="com.example.oxygen.gemma.OxygenGemmaPlugin"/>
 </extension>
 
</plugin>



## 7. **Comprehensive Integration Test**

Create a comprehensive test to verify everything works:



In [None]:
import requests
import json
import time

def comprehensive_oxygen_gemma_test():
    """Comprehensive test of Oxygen AI Positron 5.0 + Gemma3:1b integration"""
    
    base_url = "http://localhost:5000"
    
    test_cases = [
        {
            "name": "Health Check Test",
            "endpoint": "/api/health",
            "method": "GET",
            "expected_fields": ["status", "service"]
        },
        {
            "name": "Models List Test",
            "endpoint": "/api/models",
            "method": "GET",
            "expected_fields": ["data"]
        },
        {
            "name": "Basic Text Generation Test",
            "endpoint": "/api/generate",
            "method": "POST",
            "payload": {
                "input": "What is XML and why is it important in modern web development?",
                "model": "gemma3:1b",
                "max_tokens": 500
            },
            "expected_fields": ["text", "response"]
        },
        {
            "name": "Chat Completions Test (OpenAI-compatible)",
            "endpoint": "/api/chat/completions",
            "method": "POST",
            "payload": {
                "messages": [
                    {"role": "user", "content": "Explain XML namespaces with examples"}
                ],
                "model": "gemma3:1b",
                "max_tokens": 600
            },
            "expected_fields": ["choices"]
        },
        {
            "name": "XML Schema Generation Test",
            "endpoint": "/api/generate",
            "method": "POST",
            "payload": {
                "input": "Generate an XML Schema for a book catalog with the following structure: book (id attribute), title, author, publication year, genre",
                "model": "gemma3:1b",
                "max_tokens": 1000
            },
            "expected_fields": ["text"]
        },
        {
            "name": "XML Explanation Test",
            "endpoint": "/api/generate",
            "method": "POST",
            "payload": {
                "input": "Explain this XML structure: <library><book id='1'><title>XML Guide</title><author>John Doe</author><year>2024</year></book></library>",
                "model": "gemma3:1b",
                "max_tokens": 800
            },
            "expected_fields": ["text"]
        },
        {
            "name": "Writing Improvement Test",
            "endpoint": "/api/generate",
            "method": "POST",
            "payload": {
                "input": "Improve this text: XML is a markup language that is used for storing and transporting data and it is very useful for web development",
                "model": "gemma3:1b",
                "max_tokens": 400
            },
            "expected_fields": ["text"]
        }
    ]
    
    print("🧪 Running Comprehensive Oxygen AI Positron 5.0 + Gemma3:1b Integration Test")
    print("=" * 80)
    
    # Test each case
    passed_tests = 0
    total_tests = len(test_cases)
    
    for i, test in enumerate(test_cases, 1):
        print(f"\nTest {i}/{total_tests}: {test['name']}")
        print("-" * 60)
        
        try:
            start_time = time.time()
            
            if test['method'] == 'GET':
                response = requests.get(f"{base_url}{test['endpoint']}", timeout=30)
            else:
                response = requests.post(f"{base_url}{test['endpoint']}", 
                                       json=test['payload'], timeout=60)
            
            end_time = time.time()
            processing_time = end_time - start_time
            
            if response.status_code == 200:
                result = response.json()
                
                # Check for expected fields
                found_fields = []
                missing_fields = []
                
                for field in test['expected_fields']:
                    if field in result:
                        found_fields.append(field)
                    else:
                        missing_fields.append(field)
                
                if not missing_fields:
                    print(f"✅ PASS - Response time: {processing_time:.2f}s")
                    
                    # Extract and display response content
                    if 'choices' in result:
                        content = result['choices'][0]['message']['content']
                    elif 'data' in result:
                        content = f"Found {len(result['data'])} models"
                    else:
                        content = result.get('text', result.get('response', result.get('status', '')))
                    
                    if isinstance(content, str) and len(content) > 100:
                        print(f"   Generated {len(content)} characters")
                        print(f"   Preview: {content[:150]}...")
                    else:
                        print(f"   Result: {content}")
                    
                    passed_tests += 1
                else:
                    print(f"⚠️ PARTIAL - Missing fields: {missing_fields}")
                    print(f"   Available fields: {list(result.keys())}")
            else:
                print(f"❌ FAIL - HTTP {response.status_code}")
                print(f"   Error: {response.text}")
                
        except requests.exceptions.Timeout:
            print("❌ TIMEOUT - Request took too long")
        except Exception as e:
            print(f"❌ ERROR - {e}")
    
    print("\n" + "=" * 80)
    print(f"✅ Integration test completed!")
    print(f"📊 Results: {passed_tests}/{total_tests} tests passed ({(passed_tests/total_tests)*100:.1f}%)")
    
    if passed_tests == total_tests:
        print("🎉 All tests passed! Your Oxygen AI Positron + Gemma3:1b integration is ready!")
    else:
        print("⚠️ Some tests failed. Please check the configuration and try again.")

# Run the comprehensive test
if __name__ == "__main__":
    comprehensive_oxygen_gemma_test()



## 8. **Usage Instructions**

### Step 8.1: Basic Usage in Oxygen
1. Open an XML document in Oxygen XML Editor
2. Select some text
3. Use one of these methods:
   - **Menu**: Go to **Gemma AI** menu and select an action
   - **Keyboard shortcuts**: Use `Ctrl+Alt+S` (summarize), `Ctrl+Alt+I` (improve), etc.
   - **AI Positron panel**: Use the AI Positron side panel

### Step 8.2: Advanced Usage
1. Configure custom prompts in the AI Positron settings
2. Adjust model parameters (temperature, max tokens) per action
3. Create additional custom actions for specific use cases

## 9. **Troubleshooting**

### Issue 1: Connection Problems


In [None]:
def diagnose_oxygen_connection():
    """Diagnose connection issues with Oxygen AI Positron"""
    
    print("🔍 Diagnosing Oxygen AI Positron connection...")
    
    # Check if all services are running
    checks = [
        ("Ollama Server", "http://localhost:11434/api/tags"),
        ("Flask API", "http://localhost:5000/api/health"),
        ("Models List", "http://localhost:5000/api/models"),
        ("Generate Endpoint", "http://localhost:5000/api/generate")
    ]
    
    for name, url in checks:
        try:
            if "generate" in url:
                response = requests.post(url, json={"input": "test", "model": "gemma3:1b"}, timeout=10)
            else:
                response = requests.get(url, timeout=10)
            
            print(f"✅ {name}: Status {response.status_code}")
        except Exception as e:
            print(f"❌ {name}: {e}")
            
    # Check Gemma3:1b model
    try:
        result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, shell=True)
        if 'gemma3:1b' in result.stdout:
            print("✅ Gemma3:1b model available")
        else:
            print("❌ Gemma3:1b model not found - run 'ollama pull gemma3:1b'")
    except:
        print("❌ Cannot check Ollama models")

diagnose_oxygen_connection()



This comprehensive setup provides full integration between your local Ollama Gemma3:1b service and Oxygen XML Editor using AI Positron 5.0, following the official documentation and sample plugin structure for maximum compatibility and functionality.

Similar code found with 1 license type

Looking at your Ollama RESTful service implementation, I can see that it currently doesn't include API key authentication. Ollama itself doesn't natively support API keys, but you can add API key authentication to your Flask wrapper for security. Here's how to implement it:

## 1. **Add API Key Authentication to Your Flask API**

Update your `OxygenOllamaAPI` class to include API key support:



In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import threading
import requests
import traceback
import time
import secrets
import os
from functools import wraps

class OxygenOllamaAPI:
    def __init__(self, api_key=None):
        self.app = Flask(__name__)
        # Enhanced CORS configuration for Oxygen XML Editor
        CORS(self.app, 
             origins=['*'],
             methods=['GET', 'POST', 'OPTIONS', 'PUT', 'DELETE'],
             allow_headers=['Content-Type', 'Authorization', 'X-Requested-With', 'Accept', 'X-API-Key'],
             expose_headers=['Content-Type', 'Authorization'],
             supports_credentials=False)
        
        # Set API key
        # self.api_key = api_key or os.getenv('OLLAMA_API_KEY') or self.generate_api_key()
        # self.api_key = os.getenv('OLLAMA_API_KEY')
        self.api_key = self.generate_api_key()
        print(f"🔑 API Key: {self.api_key}")
        
        self.setup_routes()
    
    def generate_api_key(self):
        """Generate a secure API key"""
        return f"ollama-{secrets.token_urlsafe(32)}"
    
    def require_api_key(self, f):
        """Decorator to require API key authentication"""
        @wraps(f)
        def decorated_function(*args, **kwargs):
            # Skip API key check for OPTIONS requests (CORS preflight)
            if request.method == 'OPTIONS':
                return f(*args, **kwargs)
            
            # Check for API key in headers
            provided_key = (request.headers.get('X-API-Key') or 
                          request.headers.get('Authorization', '').replace('Bearer ', '') or
                          request.args.get('api_key'))
            
            if not provided_key:
                return jsonify({
                    'error': 'API key required',
                    'message': 'Please provide API key in X-API-Key header, Authorization header, or api_key parameter'
                }), 401
            
            if provided_key != self.api_key:
                return jsonify({
                    'error': 'Invalid API key',
                    'message': 'The provided API key is not valid'
                }), 403
            
            return f(*args, **kwargs)
        return decorated_function
        
    def setup_routes(self):
        @self.app.route('/api/generate', methods=['POST', 'OPTIONS'])
        @self.require_api_key
        def generate():
            # Handle CORS preflight requests
            if request.method == 'OPTIONS':
                response = jsonify({'status': 'ok'})
                response.headers.add('Access-Control-Allow-Origin', '*')
                response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization,X-Requested-With,X-API-Key')
                response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
                return response
            
            try:
                data = request.get_json()
                if not data:
                    return jsonify({'error': 'No JSON data provided'}), 400
                
                # Handle multiple input field formats for Oxygen compatibility
                prompt = (data.get('prompt') or 
                         data.get('input') or 
                         data.get('text') or 
                         data.get('message', ''))
                
                model = data.get('model', 'gemma3:1b')
                max_tokens = data.get('max_tokens', 1000)
                temperature = data.get('temperature', 0.7)
                
                if not prompt:
                    return jsonify({'error': 'No prompt provided'}), 400
                
                print(f"🤖 Processing authenticated request: {prompt[:50]}...")
                
                # Test Ollama connection
                try:
                    health_check = requests.get('http://localhost:11434/api/tags', timeout=5)
                    if health_check.status_code != 200:
                        return jsonify({
                            'error': f'Ollama server not responding: {health_check.status_code}'
                        }), 500
                except requests.exceptions.ConnectionError:
                    return jsonify({
                        'error': 'Cannot connect to Ollama server on localhost:11434'
                    }), 500
                
                # Call Ollama API with proper options
                ollama_request = {
                    'model': model,
                    'prompt': prompt,
                    'stream': False,
                    'options': {
                        'num_predict': max_tokens,
                        'temperature': temperature,
                        'top_k': 40,
                        'top_p': 0.9
                    }
                }
                
                response = requests.post('http://localhost:11434/api/generate', 
                                       json=ollama_request, timeout=120)
                
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('response', '')
                    
                    # Format response for Oxygen AI Positron compatibility
                    oxygen_response = {
                        'text': generated_text,  # Primary response field for Oxygen
                        'response': generated_text,  # Alternative field
                        'content': generated_text,  # Another alternative
                        'choices': [{  # OpenAI-compatible format
                            'text': generated_text,
                            'message': {
                                'content': generated_text,
                                'role': 'assistant'
                            }
                        }],
                        'model': model,
                        'success': True,
                        'usage': {
                            'total_tokens': len(generated_text.split())
                        },
                        'metadata': {
                            'tokens_generated': len(generated_text.split()),
                            'model_used': model,
                            'processing_time': result.get('total_duration', 0)
                        }
                    }
                    
                    print(f"✅ Generated {len(generated_text)} characters for authenticated user")
                    return jsonify(oxygen_response)
                else:
                    error_msg = f'Ollama API error: {response.status_code} - {response.text}'
                    print(f"❌ {error_msg}")
                    return jsonify({'error': error_msg}), 500
                    
            except Exception as e:
                error_traceback = traceback.format_exc()
                print(f"❌ Exception in generate endpoint: {error_traceback}")
                return jsonify({
                    'error': f'Server error: {str(e)}',
                    'success': False
                }), 500
        
        @self.app.route('/api/chat/completions', methods=['POST', 'OPTIONS'])
        @self.require_api_key
        def chat_completions():
            """OpenAI-compatible endpoint for Oxygen AI Positron"""
            if request.method == 'OPTIONS':
                response = jsonify({'status': 'ok'})
                response.headers.add('Access-Control-Allow-Origin', '*')
                response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization,X-API-Key')
                response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
                return response
                
            try:
                data = request.get_json()
                messages = data.get('messages', [])
                model = data.get('model', 'gemma3:1b')
                
                # Extract prompt from messages
                if messages:
                    prompt = messages[-1].get('content', '')
                else:
                    prompt = data.get('prompt', '')
                
                # Use the same generation logic
                ollama_request = {
                    'model': model,
                    'prompt': prompt,
                    'stream': False,
                    'options': {
                        'num_predict': data.get('max_tokens', 1000),
                        'temperature': data.get('temperature', 0.7)
                    }
                }
                
                response = requests.post('http://localhost:11434/api/generate', 
                                       json=ollama_request, timeout=120)
                
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('response', '')
                    
                    # OpenAI-compatible response format for Oxygen
                    openai_response = {
                        'choices': [{
                            'message': {
                                'role': 'assistant',
                                'content': generated_text
                            },
                            'finish_reason': 'stop',
                            'index': 0
                        }],
                        'model': model,
                        'usage': {
                            'total_tokens': len(generated_text.split()),
                            'prompt_tokens': len(prompt.split()),
                            'completion_tokens': len(generated_text.split())
                        },
                        'object': 'chat.completion'
                    }
                    
                    return jsonify(openai_response)
                else:
                    return jsonify({'error': 'Generation failed'}), 500
                    
            except Exception as e:
                return jsonify({'error': str(e)}), 500
        
        @self.app.route('/api/health', methods=['GET'])
        def health():
            """Health check endpoint (no authentication required)"""
            try:
                ollama_response = requests.get('http://localhost:11434/api/tags', timeout=5)
                models = ollama_response.json().get('models', []) if ollama_response.status_code == 200 else []
                
                return jsonify({
                    'status': 'healthy',
                    'service': 'ollama-gemma-api',
                    'version': '1.0.0',
                    'authentication': 'enabled',
                    'ollama_server': 'running' if ollama_response.status_code == 200 else 'error',
                    'available_models': [m.get('name', '') for m in models],
                    'flask_api': 'running',
                    'endpoints': [
                        '/api/generate (requires API key)',
                        '/api/chat/completions (requires API key)',
                        '/api/health (public)',
                        '/api/models (requires API key)',
                        '/api/key (public)'
                    ]
                })
            except Exception as e:
                return jsonify({
                    'status': 'unhealthy',
                    'error': str(e)
                })
        
        @self.app.route('/api/models', methods=['GET'])
        @self.require_api_key
        def list_models():
            """List available models in OpenAI-compatible format"""
            try:
                response = requests.get('http://localhost:11434/api/tags', timeout=10)
                if response.status_code == 200:
                    models_data = response.json()
                    models = []
                    for model in models_data.get('models', []):
                        models.append({
                            'id': model.get('name', ''),
                            'object': 'model',
                            'created': 0,
                            'owned_by': 'ollama',
                            'name': model.get('name', ''),
                            'size': model.get('size', 0),
                            'modified': model.get('modified_at', '')
                        })
                    return jsonify({'data': models, 'object': 'list'})
                else:
                    return jsonify({'error': 'Failed to fetch models'}), 500
            except Exception as e:
                return jsonify({'error': str(e)}), 500
        
        @self.app.route('/api/key', methods=['GET'])
        def get_api_key():
            """Get the current API key (for initial setup)"""
            return jsonify({
                'api_key': self.api_key,
                'usage': {
                    'header': f'X-API-Key: {self.api_key}',
                    'authorization': f'Authorization: Bearer {self.api_key}',
                    'parameter': f'?api_key={self.api_key}'
                }
            })
    
    def run(self, host='0.0.0.0', port=5000):
        self.app.run(host=host, port=port, debug=False, threaded=True)

# Create and start the enhanced API server with API key
# You can set a custom API key or let it generate one
custom_api_key = "lic-da-16888"  # Optional: set your own key
oxygen_api = OxygenOllamaAPI(api_key=custom_api_key)

def start_oxygen_api_server():
    oxygen_api.run(host='0.0.0.0', port=5000)

# Start server in background thread
server_thread = threading.Thread(target=start_oxygen_api_server, daemon=True)
server_thread.start()

print("🚀 Oxygen AI Positron 5.0 compatible API server with API key authentication started on http://localhost:5000")
print("📋 Available endpoints:")
print("- POST /api/generate - Generate text (requires API key)")
print("- POST /api/chat/completions - OpenAI-compatible endpoint (requires API key)")
print("- GET /api/health - Health check (public)")
print("- GET /api/models - List available models (requires API key)")
print("- GET /api/key - Get API key information (public)")

# Test the API with authentication
time.sleep(3)
try:
    # First, get the API key
    key_response = requests.get('http://localhost:5000/api/key')
    if key_response.status_code == 200:
        api_key = key_response.json()['api_key']
        print(f"\n🔑 Using API Key: {api_key}")
        
        # Test authenticated request
        test_data = {
            'input': 'Write a brief summary about XML editing.',
            'model': 'gemma3:1b'
        }
        
        headers = {
            'Content-Type': 'application/json',
            'X-API-Key': api_key
        }
        
        response = requests.post('http://localhost:5000/api/generate', 
                               json=test_data, headers=headers, timeout=30)
        
        if response.status_code == 200:
            result = response.json()
            print(f"\n✅ Authenticated API Test successful!")
            print(f"Response: {result.get('text', result.get('response', ''))[:200]}...")
        else:
            print(f"❌ API Test failed: {response.status_code}")
            print(response.text)
    else:
        print("❌ Failed to get API key")
        
except Exception as e:
    print(f"❌ API Test error: {e}")



## 2. **Update Oxygen AI Positron Custom Connector Configuration**

Update your custom connector configuration to include the API key:



In [None]:
{
  "id": "local-ollama-gemma3-1b",
  "name": "Local Ollama Gemma3:1b (with API Key)",
  "description": "Local Ollama server with Gemma3:1b model for text generation (API key protected)",
  "version": "1.0.0",
  "type": "custom",
  "provider": "ollama",
  "baseUrl": "http://localhost:5000",
  "authentication": {
    "type": "api_key",
    "required": true,
    "apiKey": "your-api-key-here"
  },
  "capabilities": {
    "textGeneration": true,
    "chatCompletion": true,
    "streaming": false
  },
  "endpoints": {
    "textGeneration": {
      "path": "/api/generate",
      "method": "POST",
      "headers": {
        "Content-Type": "application/json",
        "Accept": "application/json",
        "X-API-Key": "${apiKey}"
      },
      "requestBodyTemplate": {
        "input": "${prompt}",
        "model": "gemma3:1b",
        "max_tokens": "${maxTokens:1000}",
        "temperature": "${temperature:0.7}"
      },
      "responseMapping": {
        "textPath": "$.text",
        "alternativeTextPaths": [
          "$.response", 
          "$.content", 
          "$.choices[0].text",
          "$.choices[0].message.content"
        ],
        "errorPath": "$.error"
      }
    },
    "chatCompletion": {
      "path": "/api/chat/completions",
      "method": "POST",
      "headers": {
        "Content-Type": "application/json",
        "Accept": "application/json",
        "Authorization": "Bearer ${apiKey}"
      },
      "requestBodyTemplate": {
        "messages": [
          {
            "role": "user",
            "content": "${prompt}"
          }
        ],
        "model": "gemma3:1b",
        "max_tokens": "${maxTokens:1000}",
        "temperature": "${temperature:0.7}"
      },
      "responseMapping": {
        "textPath": "$.choices[0].message.content",
        "errorPath": "$.error"
      }
    },
    "models": {
      "path": "/api/models",
      "method": "GET",
      "headers": {
        "X-API-Key": "${apiKey}"
      },
      "responseMapping": {
        "modelsPath": "$.data",
        "modelIdPath": "$.id",
        "modelNamePath": "$.name"
      }
    },
    "health": {
      "path": "/api/health",
      "method": "GET"
    }
  },
  "defaultParameters": {
    "model": "gemma3:1b",
    "maxTokens": 1000,
    "temperature": 0.7
  },
  "timeout": 120000,
  "retryAttempts": 2,
  "retryDelay": 1000
}



## 3. **Configure API Key in Oxygen XML Editor**

### Step 3.1: Get Your API Key
Run this function to get your current API key:



In [None]:
def get_api_key_info():
    """Get API key information from your local service"""
    try:
        response = requests.get('http://localhost:5000/api/key', timeout=5)
        if response.status_code == 200:
            key_info = response.json()
            print("🔑 API Key Information:")
            print(f"API Key: {key_info['api_key']}")
            print("\n📋 Usage Examples:")
            print(f"Header: {key_info['usage']['header']}")
            print(f"Authorization: {key_info['usage']['authorization']}")
            print(f"Parameter: {key_info['usage']['parameter']}")
            return key_info['api_key']
        else:
            print(f"❌ Failed to get API key: {response.status_code}")
            return None
    except Exception as e:
        print(f"❌ Error getting API key: {e}")
        return None

# Get your API key
api_key = get_api_key_info()



### Step 3.2: Configure in Oxygen AI Positron
1. Go to **Options** > **Preferences** > **Plugins** > **AI Positron**
2. Click **Add Custom Connector** or edit your existing connector
3. Set the following:
   - **Name**: `Local Ollama Gemma3:1b (Authenticated)`
   - **Base URL**: `http://localhost:5000`
   - **API Key**: `[paste your API key here]`
   - **Authentication Type**: API Key or Bearer Token

## 4. **Test Authenticated API**

Create a test function to verify API key authentication:



In [None]:
def test_authenticated_api():
    """Test API with different authentication methods"""
    
    # Get API key
    key_response = requests.get('http://localhost:5000/api/key')
    if key_response.status_code != 200:
        print("❌ Failed to get API key")
        return
    
    api_key = key_response.json()['api_key']
    
    test_cases = [
        {
            "name": "X-API-Key Header",
            "headers": {"X-API-Key": api_key}
        },
        {
            "name": "Authorization Bearer",
            "headers": {"Authorization": f"Bearer {api_key}"}
        },
        {
            "name": "URL Parameter",
            "headers": {},
            "params": {"api_key": api_key}
        },
        {
            "name": "No Authentication (should fail)",
            "headers": {},
            "should_fail": True
        }
    ]
    
    test_data = {
        'input': 'Test authentication with a simple request.',
        'model': 'gemma3:1b'
    }
    
    print("🧪 Testing API Key Authentication")
    print("=" * 50)
    
    for i, test in enumerate(test_cases, 1):
        print(f"\nTest {i}: {test['name']}")
        
        try:
            headers = {'Content-Type': 'application/json'}
            headers.update(test.get('headers', {}))
            
            params = test.get('params', {})
            
            response = requests.post('http://localhost:5000/api/generate', 
                                   json=test_data, 
                                   headers=headers,
                                   params=params,
                                   timeout=30)
            
            if test.get('should_fail', False):
                if response.status_code in [401, 403]:
                    print(f"✅ PASS - Authentication correctly rejected (HTTP {response.status_code})")
                else:
                    print(f"❌ FAIL - Expected authentication failure but got HTTP {response.status_code}")
            else:
                if response.status_code == 200:
                    result = response.json()
                    generated_text = result.get('text', result.get('response', ''))
                    print(f"✅ PASS - Generated {len(generated_text)} characters")
                    print(f"   Preview: {generated_text[:100]}...")
                else:
                    print(f"❌ FAIL - HTTP {response.status_code}: {response.text}")
                    
        except Exception as e:
            print(f"❌ ERROR - {e}")
    
    print("\n" + "=" * 50)
    print("✅ Authentication test completed!")

# Run authentication test
test_authenticated_api()



## 5. **Environment Variable Configuration**

For better security, you can also set the API key via environment variable:



In [None]:
import os

# Set API key via environment variable (run once)
def set_api_key_env(api_key):
    """Set API key as environment variable"""
    os.environ['OLLAMA_API_KEY'] = api_key
    print(f"✅ API key set in environment: OLLAMA_API_KEY={api_key}")

# Use a custom API key
custom_key = "ollama-secure-key-2024"
set_api_key_env(custom_key)

# The API will automatically use this environment variable
# when creating the OxygenOllamaAPI instance

In [None]:
os.environ['OLLAMA_API_KEY']




## 6. **Security Best Practices**



In [None]:
import hashlib
import time

class SecureOllamaAPI(OxygenOllamaAPI):
    """Enhanced API with additional security features"""
    
    def __init__(self, api_key=None):
        super().__init__(api_key)
        self.request_log = {}
        self.rate_limit = 60  # requests per minute
    
    def check_rate_limit(self, client_ip):
        """Basic rate limiting"""
        now = time.time()
        minute = int(now // 60)
        
        if client_ip not in self.request_log:
            self.request_log[client_ip] = {}
        
        if minute not in self.request_log[client_ip]:
            self.request_log[client_ip][minute] = 0
        
        self.request_log[client_ip][minute] += 1
        
        # Clean old entries
        for old_minute in list(self.request_log[client_ip].keys()):
            if old_minute < minute - 1:
                del self.request_log[client_ip][old_minute]
        
        return self.request_log[client_ip][minute] <= self.rate_limit
    
    def hash_api_key(self, key):
        """Hash API key for logging"""
        return hashlib.sha256(key.encode()).hexdigest()[:16]

# Use the secure version
# secure_api = SecureOllamaAPI()



This implementation provides:

1. **Multiple Authentication Methods**: X-API-Key header, Authorization Bearer token, or URL parameter
2. **Automatic Key Generation**: Creates secure API keys if none provided
3. **CORS Support**: Properly handles preflight requests with authentication headers
4. **Error Handling**: Clear error messages for authentication failures
5. **Health Endpoint**: Public endpoint to check service status
6. **Key Retrieval**: Endpoint to get current API key for initial setup
7. **Security Features**: Rate limiting and key hashing options

The API key will be required for all protected endpoints (`/api/generate`, `/api/chat/completions`, `/api/models`) while keeping health check and key retrieval endpoints public for initial setup.

Similar code found with 1 license type