In [1]:
from openai import OpenAI

### ASSISTANTS:
#### Below is an example of how to create a assistant:
##### STEP1: CREATE AN ASSISTANT
An Assistant represents an entity that can be configured to respond to users’ Messages using several parameters like:

Instructions: how the Assistant and model should behave or respond
Model: you can specify any GPT-3.5 or GPT-4 models, including fine-tuned models. The Retrieval tool requires gpt-3.5-turbo-1106 and gpt-4-1106-preview models.
Tools: the API supports Code Interpreter and REtrieval that are built and hosted by OpenAI.
Functions: the API allows you to define custom function signatures, with similar behavior as our function calling feature.
In this example, we're creating an Assistant that is a personal math tutor, with the Code Interpreter tool enabled:

In [2]:
# Creating an assistant
client = OpenAI()
assistant = client.beta.assistants.create(
    name="Math Tutor",
    instructions="You are a personal math tutor. Write and run code to answer math questions.",
    tools=[{"type": "code_interpreter"}],
    model="gpt-4-1106-preview"
)

##### Step 2: Create a Thread
A Thread represents a conversation. We recommend creating one Thread per user as soon as the user initiates the conversation. Pass any user-specific context and files in this thread by creating Messages.

python

python
thread = client.beta.threads.create()
Threads don’t have a size limit. You can pass as many Messages as you want to a Thread. The API will ensure that requests to the model fit within the maximum context window, using relevant optimization techniques such as truncation.

##### Step 3: Add a Message to a Thread
A Message contains the user's text, and optionally, any files that the user uploads. Image files aren't supported today, but we plan to add support for them in the coming months.

In [3]:
# Creating a thread
thread = client.beta.threads.create()
# Adding message to thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="I need to solve the equation `3x + 11 = 14`. Can you help me?"
)
# Viewing the thread
thread_messages = client.beta.threads.messages.list(thread.id)
print(thread_messages)

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_VuduIf2r8FcHLFgxMctZu8lI', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='I need to solve the equation `3x + 11 = 14`. Can you help me?'), type='text')], created_at=1699392061, file_ids=[], metadata={}, object='thread.message', role='user', run_id=None, thread_id='thread_maKAq3drBQEZqzOYymVFu7sC')], object='list', first_id='msg_VuduIf2r8FcHLFgxMctZu8lI', last_id='msg_VuduIf2r8FcHLFgxMctZu8lI', has_more=False)


##### Step 4: Run the Assistant
For the Assistant to respond to the user message, you need to create a Run. This makes the Assistant read the Thread and decide whether to call tools or simply use the model to best answer the user query. As the run progresses, the assistant appends Messages to the thread with the role="assistant" .

You can optionally pass additional instructions to the Assistant while creating the Run:

In [4]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="Please address the user as Jane Doe. The user has a premium account."
)

##### Step 5: Display the Assistant's Response
This creates a Run in a queued status.
You can periodically retrieve the Run to check on its status to see if it has moved to completed.

In [5]:
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)
print(run.status)

in_progress


In [6]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
print(messages.data[0].content[0].text.value)

The solution to the equation \(3x + 11 = 14\) is \(x = 1\).


#### To view the steps

In [14]:
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)
print(run_steps.data)

[RunStep(id='step_nQGyl2tYa8qIBRWnB8Nw5mP5', assistant_id='asst_esqZezHqzykAFgkB6kLroloO', cancelled_at=None, completed_at=1699392113, created_at=1699392099, expired_at=None, failed_at=None, last_error=None, metadata=None, object='thread.run.step', run_id='run_ByzOGWlrHEdtovGk42C3rwhQ', status='completed', step_details=MessageCreationStepDetails(message_creation=MessageCreation(message_id='msg_7abEg5qbWfsVY2BSgcMZ5v1N'), type='message_creation'), thread_id='thread_XYbcDUWe61lPDakAXdoh3vBk', type='message_creation', expires_at=None), RunStep(id='step_roEwDKpvtXB63BSrwH9Jduo7', assistant_id='asst_esqZezHqzykAFgkB6kLroloO', cancelled_at=None, completed_at=1699392099, created_at=1699392091, expired_at=None, failed_at=None, last_error=None, metadata=None, object='thread.run.step', run_id='run_ByzOGWlrHEdtovGk42C3rwhQ', status='completed', step_details=ToolCallsStepDetails(tool_calls=[RetrievalToolCall(id='call_wYKc1qCPo9U5Cwx7FQCx1qTC', retrieval={}, type='retrieval')], type='tool_calls'), 

### Assistant for file

In [8]:
import inspect

sig = inspect.signature(client.files.create)
for param in sig.parameters.values():
    print('Name:', param.name)
    print('Default:', param.default if param.default is not param.empty else None)
    print('Annotation:', param.annotation if param.annotation is not param.empty else None)
    print('Kind:', param.kind)

Name: file
Default: None
Annotation: FileTypes
Kind: KEYWORD_ONLY
Name: purpose
Default: None
Annotation: Literal['fine-tune', 'assistants']
Kind: KEYWORD_ONLY
Name: extra_headers
Default: None
Annotation: Headers | None
Kind: KEYWORD_ONLY
Name: extra_query
Default: None
Annotation: Query | None
Kind: KEYWORD_ONLY
Name: extra_body
Default: None
Annotation: Body | None
Kind: KEYWORD_ONLY
Name: timeout
Default: NOT_GIVEN
Annotation: float | httpx.Timeout | None | NotGiven
Kind: KEYWORD_ONLY


In [9]:
# Upload file
file = client.files.create(
  file=open("./example_docs/DarwinVCFinStatements.pdf", "rb"),
  purpose='assistants'
)

# Add file to the assistant
assistant = client.beta.assistants.create(
  name="Data visualizer",
  description="You are great at creating beautiful summaries of a pdf file. You analyze the text and tables in the pdf and generate a summary, ensuring to highlight the main themes of the observed file.",
  model="gpt-4-1106-preview",
  tools=[{"type": "retrieval"}],
  file_ids=[file.id]
)

In [10]:
# Creating a thread
thread = client.beta.threads.create()
# Adding message to thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="I have'nt got the time to read the entire document and so would like to generate a concise yet informative summary of the same."
)

In [11]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

In [12]:
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)

In [13]:
print(run.status)
messages = client.beta.threads.messages.list(thread_id=thread.id)


in_progress


In [20]:
print(messages.data[0].content[0].text.value)

The document provided is the financial statement for Darwin Venture Capital Fund of Funds III L.P., covering the fiscal year ending on December 31, 2018. Below is a comprehensive summary presenting key details:

1. **Overview of the Partnership**:
   - Darwin Venture Capital Fund of Funds III L.P. was established on October 10, 2012, and will operate until August 24, 2027, unless terminated earlier or extended.
   - The partnership's focus is on venture capital-type investments to achieve capital appreciation primarily through investments in other venture capital funds.
   - Darwin Ventures III LLC is the general partner, and there are several limited partners【9†source】.

2. **Significant Accounting Policies**:
   - The financial statements conform to U.S. GAAP with specialized accounting for investment companies.
   - They use estimates impacting reported assets, liabilities, revenue, and expenses.
   - Investments are primarily in U.S.-based early-stage venture funds, with a maximum 

### Table extractor assistant:


In [87]:
import os
from openai import OpenAI
client = OpenAI()

# File list
file_dir = "./example_docs/"
file_list = os.listdir(file_dir)

ASSISTANT_INSTRUCTION = """
                      You are a helpful and intelligent assistant.
                      Use the attached documents to answer questions related to them.
                      """

# Upload file
file_id = []
for file in file_list:
  f = client.files.create(
  file=open(f"./example_docs/{file}", "rb"),
  purpose='assistants'
  )
  file_id.append(f.id)

print(file_id)

['file-Su8inAIZPw57MdUuI46RFn8T', 'file-OHOAkEKJXLyPGGeJEWvVnaiu']


In [88]:
# Add file to the assistant
assistant = client.beta.assistants.update(
  name="PDF Table",
  description = 
  description=ASSISTANT_INSTRUCTION,
  model="gpt-4-1106-preview",
  tools=[{"type": "code_interpreter"},{"type": "retrieval"}],
  file_ids=file_id
)

In [81]:
THREAD_MESSAGE = """what sections contain tables related to investment schedule/investment amount etc."""

# Creating a thread
thread = client.beta.threads.create()
# Adding message to thread
message = client.beta.threads.messages.create(
    thread_id=thread.id, role="user", content=THREAD_MESSAGE
)

In [82]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

In [83]:
import time
run = client.beta.threads.runs.retrieve(
  thread_id=thread.id,
  run_id=run.id
)


In [86]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
print(messages.data[0].content[0].text.value)
print(messages.data[0].content[0].text.annotations)

The document contains information related to investment schedules and amounts in the following sections:

1. "Statement of Net Assets as of December 31, 2018"
   - This section presents the assets and liabilities, including investments in venture funds at estimated fair value and cash equivalents.

2. "Schedule of Portfolio Investments as of December 31, 2018"
   - This schedule details various investments in venture funds with cost, fair value, and percentage of partners' capital for each investment.

3. "Statement of Operations for the year ended December 31, 2018"
   - This statement includes the operating income and expenses, net investment loss, and investment gain/(loss) for the year.

4. "Statement of Changes in Partners' Capital for the year ended December 31, 2018"
   - This part contains a detailed list of balance changes in general and limited partners' capital, including contributed capital, distributions to partners, net investment loss, and net realized and unrealized gai

In [47]:
# Adding message to thread
message = client.beta.threads.messages.create(
    thread_id=thread.id, role="user", content="can you extract more such tables' sources?"
)
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)
messages2 = client.beta.threads.messages.list(thread_id=thread.id)
print(messages2.data[0])
print(run.status)

ThreadMessage(id='msg_ETMBoZY1YWYCtEWyga4d97yx', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value="can you extract more such tables' sources?"), type='text')], created_at=1699453033, file_ids=[], metadata={}, object='thread.message', role='user', run_id=None, thread_id='thread_qY6sVsajSLo5kXOXmAd9cdIU')
queued
