# Build a local Probe server for a custom front end

In [None]:
from flask import Flask, request, jsonify
from probe import probe
import json

In [None]:
app = Flask(__name__)

# Configure Probe

## Local Model
# probe.offline = True
# probe.llm.model = "ollama/llama3.1"
# probe.llm.api_base = "http://localhost:11434"
# probe.llm.context_window = 4000
# probe.llm.max_tokens = 3000
# probe.auto_run = True
# probe.verbose = True

## Hosted Model
probe.llm.model = "gpt-4o"
probe.llm.context_window = 10000
probe.llm.max_tokens = 4096
probe.auto_run = True

# Create an endpoint
@app.route('/chat', methods=['POST'])
def chat():
    # Expected payload: {"prompt": "User's message or question"}
    data = request.json
    prompt = data.get('prompt')
    
    if not prompt:
        return jsonify({"error": "No prompt provided"}), 400

    full_response = ""
    try:
        for chunk in probe.chat(prompt, stream=True, display=False):
            if isinstance(chunk, dict):
                if chunk.get("type") == "message":
                    full_response += chunk.get("content", "")
            elif isinstance(chunk, str):
                # Attempt to parse the string as JSON
                try:
                    json_chunk = json.loads(chunk)
                    full_response += json_chunk.get("response", "")
                except json.JSONDecodeError:
                    # If it's not valid JSON, just add the string
                    full_response += chunk
    except Exception as e:
        return jsonify({"error": str(e)}), 500

    return jsonify({"response": full_response.strip()})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5001)

print("Probe server is running on http://0.0.0.0:5001")

## Make a request to the server

curl -X POST http://localhost:5001/chat \
     -H "Content-Type: application/json" \
     -d '{"prompt": "Hello, how are you?"}'