這份 Notebook 示範 OpenAI Assistants API

https://platform.openai.com/docs/assistants/overview

In [1]:
from google.colab import userdata
openai_api_key = userdata.get('openai_api_key')

In [None]:
import requests
import json
from pprint import pp

## 0. 上傳 RAG 用的參考檔案 (也可在後台 Playground 完成)

In [None]:
# 找個範例檔案 https://report.nat.gov.tw/ReportFront/ReportDetail/detail?sysId=C11201557
!wget -O 'C11201717_1.pdf' https://report.nat.gov.tw/ReportFront/PageSystem/reportFileDownload/C11201717/001

--2023-11-12 12:05:23--  https://report.nat.gov.tw/ReportFront/PageSystem/reportFileDownload/C11201717/001
Resolving report.nat.gov.tw (report.nat.gov.tw)... 223.200.132.62, 2001:4420:6703:123:250:56ff:fe81:6e14
Connecting to report.nat.gov.tw (report.nat.gov.tw)|223.200.132.62|:443... connected.
HTTP request sent, awaiting response... 200 
Length: 2616234 (2.5M) [application/pdf]
Saving to: ‘C11201717_1.pdf’


2023-11-12 12:05:31 (465 KB/s) - ‘C11201717_1.pdf’ saved [2616234/2616234]



In [None]:
data = { "purpose": "assistants" }
f = open('C11201717_1.pdf', 'rb')
files = [("file", ("C11201717_1.pdf", f, "application/octet-stream"))]

headers = { "Authorization": f'Bearer {openai_api_key}', "OpenAI-Beta": "assistants=v1" }
response = requests.post(f"https://api.openai.com/v1/files", headers = headers, data = data, files = files )
obj = json.loads(response.text)

print(obj)

{'object': 'file', 'id': 'file-bRJLxcdtEDCm3N694OlD96Y8', 'purpose': 'assistants', 'filename': 'C11201717_1.pdf', 'bytes': 2616234, 'created_at': 1699790762, 'status': 'processed', 'status_details': None}


In [None]:
file_id = obj["id"]
file_id

'file-bRJLxcdtEDCm3N694OlD96Y8'

## 1. 建立 assistant with retriever，此時 OpenAI 才跑向量索引 (也可在後台 Playground 完成)

In [None]:
payload = { "model": "gpt-4-1106-preview", "name": "ihower-test-2-rag", "tools": [{"type": "retrieval"}],
            "instructions": "You are a customer support chatbot. Use your knowledge base to best respond to customer queries.",
            "file_ids": [file_id] }
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post('https://api.openai.com/v1/assistants', headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)

{'id': 'asst_0dXud0IRFAfLpu4mjgfA8zXM', 'object': 'assistant', 'created_at': 1699790819, 'name': 'ihower-test-2-rag', 'description': None, 'model': 'gpt-4-1106-preview', 'instructions': 'You are a customer support chatbot. Use your knowledge base to best respond to customer queries.', 'tools': [{'type': 'retrieval'}], 'file_ids': ['file-bRJLxcdtEDCm3N694OlD96Y8'], 'metadata': {}}


In [None]:
assistant_id = obj["id"]
assistant_id

'asst_0dXud0IRFAfLpu4mjgfA8zXM'

## 2. 建立對話 thread

In [None]:
payload = { "messages": [ { "role": "user", "content": "請問和 Pentera 的會晤討論了什麼?"} ] }
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post('https://api.openai.com/v1/threads', headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)

{'id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07', 'object': 'thread', 'created_at': 1699790857, 'metadata': {}}


In [None]:
thread_id = obj["id"]

## 3. 執行 (assistant+thread).run，產生 AI 回覆

有另一個 API 可以合併 2,3 步驟，同時建立 thread 並且執行。這裡拆開呼叫 run:

In [None]:
payload = { "assistant_id": assistant_id }
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/runs", headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)

{'id': 'run_0rcwrC8HxOrgo4lageHwWav0', 'object': 'thread.run', 'created_at': 1699790884, 'assistant_id': 'asst_0dXud0IRFAfLpu4mjgfA8zXM', 'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07', 'status': 'queued', 'started_at': None, 'expires_at': 1699791484, 'cancelled_at': None, 'failed_at': None, 'completed_at': None, 'last_error': None, 'model': 'gpt-4-1106-preview', 'instructions': 'You are a customer support chatbot. Use your knowledge base to best respond to customer queries.', 'tools': [{'type': 'retrieval'}], 'file_ids': ['file-bRJLxcdtEDCm3N694OlD96Y8'], 'metadata': {}}


In [None]:
run_id = obj["id"]

## 4. 觀察 run 結果 (目前只能輪詢問是否完成，之後會支援 streaming)

In [None]:
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/runs/{run_id}", headers = headers )
obj = json.loads(response.text)

pp(obj)

{'id': 'run_0rcwrC8HxOrgo4lageHwWav0',
 'object': 'thread.run',
 'created_at': 1699790884,
 'assistant_id': 'asst_0dXud0IRFAfLpu4mjgfA8zXM',
 'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07',
 'status': 'completed',
 'started_at': 1699790884,
 'expires_at': None,
 'cancelled_at': None,
 'failed_at': None,
 'completed_at': 1699790900,
 'last_error': None,
 'model': 'gpt-4-1106-preview',
 'instructions': 'You are a customer support chatbot. Use your knowledge base '
                 'to best respond to customer queries.',
 'tools': [{'type': 'retrieval'}],
 'file_ids': ['file-bRJLxcdtEDCm3N694OlD96Y8'],
 'metadata': {}}


## 5. 查看目前 messages，剛剛 run 出來的 AI 回覆放到這裡了

In [None]:
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers )
obj = json.loads(response.text)

pp(obj)

{'object': 'list',
 'data': [{'id': 'msg_Y8cQoB4w5cr6ODbES7jL3mlf',
           'object': 'thread.message',
           'created_at': 1699790890,
           'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': '在與 Pentera '
                                          '的會晤中，訪團參與的是網路安全週圓桌論壇，並對資安與 AI '
                                          '議題，與以色列的專家學者及業界領袖進行了深度的討論與交流。論壇涵蓋了來自以色列資安產業與研究機構的代表，他們分享了資安防護與新興技術發展的現況。討論的主題包括新的 '
                                          'AI '
                                          '科技對資安防護所帶來的機會與威脅、健康醫療資料分析的機會與隱私保護的挑戰、如何確保 '
                                          'AI '
                                          '科技的正確性與透明性等。會議後，數位產業署林俊秀副署長及資通安全署鄭欣明副署長介紹了台灣的產業政策，跨部會資安聯防架構，以及對關鍵基礎設施資安防護的推動等內容。此外，還有跨國攻防演練（CODE）的組織，以提升資安專業能力和應變能力，及建立跨國聯防的基礎【9†source】。',
                                 'annotations': [{'type': 'file_citation',
                          

## 可繼續加 message 到 thread 中，繼續 run 對話

In [None]:
payload = { "role": "user", "content": "和 Chain Reaction 談了什麼?" }
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)

{'id': 'msg_X5l6gevfUudFzaNTw0D8wWlj', 'object': 'thread.message', 'created_at': 1699791075, 'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07', 'role': 'user', 'content': [{'type': 'text', 'text': {'value': '和 Chain Reaction 談了什麼?', 'annotations': []}}], 'file_ids': [], 'assistant_id': None, 'run_id': None, 'metadata': {}}


In [None]:
payload = { "assistant_id": assistant_id }
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/runs", headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)

{'id': 'run_ZdFd2Cm4lQo8wSMqkRcEF2Km', 'object': 'thread.run', 'created_at': 1699791080, 'assistant_id': 'asst_0dXud0IRFAfLpu4mjgfA8zXM', 'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07', 'status': 'queued', 'started_at': None, 'expires_at': 1699791680, 'cancelled_at': None, 'failed_at': None, 'completed_at': None, 'last_error': None, 'model': 'gpt-4-1106-preview', 'instructions': 'You are a customer support chatbot. Use your knowledge base to best respond to customer queries.', 'tools': [{'type': 'retrieval'}], 'file_ids': ['file-bRJLxcdtEDCm3N694OlD96Y8'], 'metadata': {}}


In [None]:
headers = { "Authorization": f'Bearer {openai_api_key}', "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers )
obj = json.loads(response.text)

pp(obj)

{'object': 'list',
 'data': [{'id': 'msg_X5l6gevfUudFzaNTw0D8wWlj',
           'object': 'thread.message',
           'created_at': 1699791075,
           'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07',
           'role': 'user',
           'content': [{'type': 'text',
                        'text': {'value': '和 Chain Reaction 談了什麼?',
                                 'annotations': []}}],
           'file_ids': [],
           'assistant_id': None,
           'run_id': None,
           'metadata': {}},
          {'id': 'msg_Y8cQoB4w5cr6ODbES7jL3mlf',
           'object': 'thread.message',
           'created_at': 1699790890,
           'thread_id': 'thread_VXpXR6vI3X7ycmHq1KmEEb07',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': '在與 Pentera '
                                          '的會晤中，訪團參與的是網路安全週圓桌論壇，並對資安與 AI '
                                          '議題，與以色列的專家學者及業界領袖進行了深度的討論與交流。論壇涵蓋了來自以色列資安產業與研究機構的代表，他們分享了資安防護與

## 已知 Asssiatants API 問題

1. 只能輪詢問 run 狀態，還不支援 streaming
1. 對話也沒有支援 streaming