In [None]:
import requests
import json

ollama_api_url = 'http://localhost:11434/api/chat'

def stream_ollama_response(model, prompt):
    # 為 /api/chat 端點準備請求的 payload
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "stream": True
    }

    try:
        with requests.post(ollama_api_url, json=payload, stream=True) as response:
            # 檢查請求是否成功
            response.raise_for_status()
            
            # 逐行迭代回應內容
            for line in response.iter_lines():
                if line:
                    try:
                        # 每一行都是一個獨立的 JSON 物件
                        json_data = json.loads(line.decode('utf-8'))
                        
                        # 從 'message' 物件中提取 'content'
                        content_chunk = json_data.get('message', {}).get('content', '')
                        
                        # 使用 yield 回傳每一塊內容
                        yield content_chunk
                        
                        # 最後一個 JSON 物件會有一個 'done' 欄位為 true
                        if json_data.get('done'):
                            break
                    except json.JSONDecodeError:
                        print(f"\n[警告] 無法解析的 JSON 行: {line}")
                        
    except requests.exceptions.RequestException as e:
        print(f"\n[錯誤] 請求失敗: {e}")

# --- 如何使用 ---

model_to_use = "gemma3:4b"
user_prompt = "中國迅速侵蝕美國全球 AI 競賽領導地位這件事，請提供詳細的分析。"

print(f"--- 正在從 {model_to_use} 接收串流式應答 ---")

# for 迴圈會自動處理 generator，並在收到每個區塊時立即印出
full_response = ""
for chunk in stream_ollama_response(model_to_use, user_prompt):
    print(chunk, end='', flush=True)
    full_response += chunk

print("\n\n--- 串流結束 ---")
# print("\n完整的回應:\n", full_response) # 您也可以在最後檢視完整回應


In [8]:
import requests
import json

ollama_api_url = 'http://localhost:1234/ollama/chat'

def stream_ollama_response(prompt):
    try:
        with requests.post(ollama_api_url, json={ "prompt": prompt }, stream=True) as response:
            response.raise_for_status()

            for line in response.iter_lines():
                if line:
                    try:
                        json_data = json.loads(line.decode('utf-8'))
                        
                        content_chunk = json_data.get('message', {}).get('content', '')
                        
                        yield content_chunk
                        
                        if json_data.get('done'):
                            break
                    except json.JSONDecodeError:
                        print(f"\n[警告] 無法解析的 JSON 行: {line}")
                        
    except requests.exceptions.RequestException as e:
        print(f"\n[錯誤] 請求失敗: {e}")

def ask_ollama(prompt):
	full_string = ""
	for chunk in stream_ollama_response(prompt):
		print(chunk, end='', flush=True)
		full_string += chunk

In [None]:
ask_ollama('python的yield怎麼用？')