In [1]:
## use getpass to read password, also save to the enviroment variable
import os, requests, json
from pprint import pp
import getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')

### Helper functions

In [None]:
def get_completion(messages, model='gpt-3.5-turbo', temperature=0, max_tokens=300):
    api_url = 'https://api.openai.com/v1/chat/completions'

    payload = {
        "model": model,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "messages": messages,
    }

    headers = {
        "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
        "Content-Type": "application/json",
    }

    response = requests.post(api_url, headers=headers, data=json.dumps(payload), timeout=60)

    resp_text = json.loads(response.text)
    resp_status = response.status_code
    print(resp_text)
    print(resp_status)
    if resp_status == 200:
        return resp_text["choices"][0]["message"]["content"]
    else:
        return resp_text["error"]['message']

### GPT-4 Vision Experiment

In [61]:
## preview the image_url
from IPython.display import Image
request_image_url = 'https://picsum.photos/800/1280'
response = requests.get(request_image_url, allow_redirects=True)
image_url = response.url

Image(url=image_url, width=200, height=300)



In [62]:
messages = [
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "請以100字內描述這張照片"
            },
            {
                "type": "image_url",
                "image_url": image_url
            }
        ],
    }
]

completion = get_completion(messages, model='gpt-4-vision-preview')
completion

{'id': 'chatcmpl-8ITfwgjHp3sLICR5PmUV6LVBobiT7', 'object': 'chat.completion', 'created': 1699414668, 'model': 'gpt-4-1106-vision-preview', 'usage': {'prompt_tokens': 1128, 'completion_tokens': 108, 'total_tokens': 1236}, 'choices': [{'message': {'role': 'assistant', 'content': '這張黑白照片展示了一堵老舊的石牆，部分浸在水中。牆面上有磨損跡象，不同大小的石塊堆砌而成，顯示出時間的痕跡。牆角有一小撮頑強的植物生長，增添了一絲生命力。'}, 'finish_details': {'type': 'stop', 'stop': '<|fim_suffix|>'}, 'index': 0}]}
200


'這張黑白照片展示了一堵老舊的石牆，部分浸在水中。牆面上有磨損跡象，不同大小的石塊堆砌而成，顯示出時間的痕跡。牆角有一小撮頑強的植物生長，增添了一絲生命力。'

### Assistant API Demo

In [12]:
# OpenAI assistant API with retrieval

## pdf from arXiv: https://arxiv.org/pdf/2305.06983.pdf

doc_source = 'https://arxiv.org/pdf/2305.06983.pdf'

filename = doc_source.split('/')[-1]

## check if the file exists
import os.path
if not os.path.isfile(filename):
    # wget with specific user agent
    !wget -U "Mozilla/5.0" -O $filename $doc_source
    print('downloaded file:', filename)    
    

files = [
    ('file',
      (
          filename,
          open(filename, 'rb'),
          "application/octet-stream"
      )
    )
]

headers = {
    "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
}

data = {
    "purpose": "assistants",    
}

## List the files
# response = requests.get('https://api.openai.com/v1/files', headers=headers)
# print(json.loads(response.text))

### Upload the file to OpenAI

response = requests.post('https://api.openai.com/v1/files', headers=headers, data=data, files=files)

# print(json.loads(response.text))

## Get file id and save to the environment variable
file_id = json.loads(response.text)['id']
file_name = json.loads(response.text)['filename']
os.environ['OPENAI_FILE_ID'] = file_id

print(f"file_id: {file_id}, file_name: {file_name}")



file_id: file-lkpTm2i3nUllocWdQYDadYEF, file_name: 2305.06983.pdf


In [9]:
## List the files
response = requests.get('https://api.openai.com/v1/files', headers=headers)
print(json.loads(response.text))


{'object': 'list', 'has_more': False, 'data': [{'object': 'file', 'id': 'file-O7qYlk7YxW01KtI4n0hZrVpl', 'purpose': 'assistants', 'filename': '2305.06983.pdf', 'bytes': 850866, 'created_at': 1699418087, 'status': 'processed', 'status_details': None}, {'object': 'file', 'id': 'file-ffoOkEmb5gj3djWxvMJP8xYJ', 'purpose': 'fine-tune-results', 'filename': 'compiled_results.csv', 'bytes': 215048, 'created_at': 1675331528, 'status': 'processed', 'status_details': None}, {'object': 'file', 'id': 'file-XJ67TkoFSKM6QgwzsEr03YOl', 'purpose': 'fine-tune', 'filename': 'sport2_prepared_valid.jsonl', 'bytes': 387349, 'created_at': 1675330133, 'status': 'processed', 'status_details': None}, {'object': 'file', 'id': 'file-0oYDxq1R5WgoGuMTHvcVXGYR', 'purpose': 'fine-tune', 'filename': 'sport2_prepared_train.jsonl', 'bytes': 1519036, 'created_at': 1675330131, 'status': 'processed', 'status_details': None}, {'object': 'file', 'id': 'file-xREhbH04eGaasZCkwtQo8kpm', 'purpose': 'fine-tune', 'filename': 'spor

In [16]:

## Create the assistant with retrieval

headers = {
    "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
    "Content-Type": "application/json",
    "OpenAI-Beta": "assistants=v1",
}

assistant_name = 'jimmyliao-assistant-1'
instructions = """
    You are a customer support chatbot. Use your knowledge base to best respond to customer queries.
"""

payload = {
    "model": "gpt-4-1106-preview", 
    "name": assistant_name, 
    "tools": [{"type": "retrieval"}],
    "instructions": instructions,
    "file_ids": [file_id],
}

response = requests.post('https://api.openai.com/v1/assistants', headers=headers, data=json.dumps(payload))

assistant_id = json.loads(response.text)['id']
print(json.loads(response.text))



{'id': 'asst_OeaBs0p9ktOOrc7PlQkr0hj1', 'object': 'assistant', 'created_at': 1699420124, 'name': 'jimmyliao-assistant-1', 'description': None, 'model': 'gpt-4-1106-preview', 'instructions': '\n    You are a customer support chatbot. Use your knowledge base to best respond to customer queries.\n', 'tools': [{'type': 'retrieval'}], 'file_ids': ['file-lkpTm2i3nUllocWdQYDadYEF'], 'metadata': {}}


In [17]:
## Create thread 

headers = {
    "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
    "Content-Type": "application/json",
    "OpenAI-Beta": "assistants=v1",
}

payload = { "messages": [ { "role": "user", "content": "請問這篇文章的作者有哪些人?"} ] }

response = requests.post('https://api.openai.com/v1/threads', headers=headers, data=json.dumps(payload))

thread_id = json.loads(response.text)['id']

print(json.loads(response.text))

{'id': 'thread_NTITVeCl4gQaJ987OKX8ConB', 'object': 'thread', 'created_at': 1699420136, 'metadata': {}}


In [21]:
## Execute (assistant + thread).run, and get the response

payload = {
    "assistant_id": assistant_id,
    # "thread_id": thread_id,
}

headers = {
    "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
    "Content-Type": "application/json",
    "OpenAI-Beta": "assistants=v1",
}

response = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/runs", headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

run_id = obj['id']
print(obj)




{'id': 'run_aDjHtRXlAPqehaiTCmrGK5Ha', 'object': 'thread.run', 'created_at': 1699420265, 'assistant_id': 'asst_OeaBs0p9ktOOrc7PlQkr0hj1', 'thread_id': 'thread_NTITVeCl4gQaJ987OKX8ConB', 'status': 'queued', 'started_at': None, 'expires_at': 1699420865, 'cancelled_at': None, 'failed_at': None, 'completed_at': None, 'last_error': None, 'model': 'gpt-4-1106-preview', 'instructions': '\n    You are a customer support chatbot. Use your knowledge base to best respond to customer queries.\n', 'tools': [{'type': 'retrieval'}], 'file_ids': ['file-lkpTm2i3nUllocWdQYDadYEF'], 'metadata': {}}


In [22]:
## Monitor the run status

headers = {
    "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
    "Content-Type": "application/json",
    "OpenAI-Beta": "assistants=v1",
}

response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/runs/{run_id}", headers = headers)

obj = json.loads(response.text)


In [27]:
## Get the assistant response from thread

headers = { "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers )
obj = json.loads(response.text)

pp(obj)


{'object': 'list',
 'data': [{'id': 'msg_MH8wUFpqL6cZpBWwEyh8zPf2',
           'object': 'thread.message',
           'created_at': 1699420266,
           'thread_id': 'thread_NTITVeCl4gQaJ987OKX8ConB',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': 'It appears that there is an issue '
                                          'with the file you uploaded. The '
                                          'browsing display did not show any '
                                          'content, which may indicate a '
                                          'problem with accessing the file. '
                                          'Could you please confirm if the '
                                          'file is in a readable format such '
                                          'as PDF, DOCX, or TXT and try '
                                          'uploading it again? This will '
                                

In [28]:
headers = { "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers )
obj = json.loads(response.text)

pp(obj)


{'object': 'list',
 'data': [{'id': 'msg_MH8wUFpqL6cZpBWwEyh8zPf2',
           'object': 'thread.message',
           'created_at': 1699420266,
           'thread_id': 'thread_NTITVeCl4gQaJ987OKX8ConB',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': 'It appears that there is an issue '
                                          'with the file you uploaded. The '
                                          'browsing display did not show any '
                                          'content, which may indicate a '
                                          'problem with accessing the file. '
                                          'Could you please confirm if the '
                                          'file is in a readable format such '
                                          'as PDF, DOCX, or TXT and try '
                                          'uploading it again? This will '
                                

{'object': 'list',
 'data': [{'id': 'msg_RR5oVMySTliFVMrS6KSR2kLW',
           'object': 'thread.message',
           'created_at': 1699420857,
           'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': 'The title of the document is '
                                          '"Active Retrieval Augmented '
                                          'Generation"【9†source】.',
                                 'annotations': []}}],
           'file_ids': [],
           'assistant_id': 'asst_OeaBs0p9ktOOrc7PlQkr0hj1',
           'run_id': 'run_6A71wzKlRXkZ9t5XcHbI926Z',
           'metadata': {}},
          {'id': 'msg_AjdmIeF6Q2TY75AuzKaArbhI',
           'object': 'thread.message',
           'created_at': 1699420853,
           'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql',
           'role': 'user',
           'content': [{'type': 'text',
                        'text': {'va

In [30]:
## Add a new message to the thread


payload = { "role": "user", "content": "有哪些作者？" }
headers = { "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.post(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers, data = json.dumps(payload) )
obj = json.loads(response.text)

print(obj)




{'id': 'msg_M7GUJX7NW70OETlzA7d0zOeD', 'object': 'thread.message', 'created_at': 1699421074, 'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql', 'role': 'user', 'content': [{'type': 'text', 'text': {'value': '有哪些作者？', 'annotations': []}}], 'file_ids': [], 'assistant_id': None, 'run_id': None, 'metadata': {}}


In [33]:
## Get the assistant response from thread with message id

headers = { "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }

response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages/{obj['id']}", headers = headers )

obj = json.loads(response.text)

obj


{'id': 'msg_M7GUJX7NW70OETlzA7d0zOeD',
 'object': 'thread.message',
 'created_at': 1699421074,
 'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql',
 'role': 'user',
 'content': [{'type': 'text', 'text': {'value': '有哪些作者？', 'annotations': []}}],
 'file_ids': [],
 'assistant_id': None,
 'run_id': None,
 'metadata': {}}

In [34]:
## Get the assistant response from thread

headers = { "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}", "Content-Type": "application/json", "OpenAI-Beta": "assistants=v1" }
response = requests.get(f"https://api.openai.com/v1/threads/{thread_id}/messages", headers = headers )
obj = json.loads(response.text)

pp(obj)


{'object': 'list',
 'data': [{'id': 'msg_M7GUJX7NW70OETlzA7d0zOeD',
           'object': 'thread.message',
           'created_at': 1699421074,
           'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql',
           'role': 'user',
           'content': [{'type': 'text',
                        'text': {'value': '有哪些作者？', 'annotations': []}}],
           'file_ids': [],
           'assistant_id': None,
           'run_id': None,
           'metadata': {}},
          {'id': 'msg_RR5oVMySTliFVMrS6KSR2kLW',
           'object': 'thread.message',
           'created_at': 1699420857,
           'thread_id': 'thread_Do4zOxniV1RXHbQB3nRoQhql',
           'role': 'assistant',
           'content': [{'type': 'text',
                        'text': {'value': 'The title of the document is '
                                          '"Active Retrieval Augmented '
                                          'Generation"【9†source】.',
                                 'annotations': []}}],
           'fil