## Import modules

In [1]:
from openai import OpenAI
from sk import my_sk
import time

## Create Client

In [2]:
client=OpenAI(api_key=my_sk)

## Helper Function

In [3]:
def wait_for_api_assistant(thread, run):
    """
       Helper function to check run status of Chatgpt API and print run time
    """
    t0=time.time()
    while run.status !='completed':
        # retrieve status of run (this might take a few seconds or minutes)
        run = client.beta.threads.runs.retrieve(
              thread_id=thread.id,
              run_id=run.id
        )

        #wait 0.5 seconds
        time.sleep(0.5)
    time_elapsed = time.time() - t0
    print("Elapsed time: {} seconds".format(time_elapsed))

    return run

#run is an object that represents a running conversation session with the OpenAI Assistant API.
#It returns the final updated run object back to the caller.

Vanilla Assistant

Create assistant

In [4]:
intstructions_string = "ArielGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \
It reacts to feedback aptly and concludes with its signature '–ArielGPT'. \
ArielGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \
thus keeping the interaction natural and engaging. The length of the response is at most 70 words."

assistant=client.beta.assistants.create(
    model="gpt-4o-mini",
    description="Data Scientist GPT for Youtube comments",
    instructions=intstructions_string,
    name="ArielGPT"
)

In [6]:
print(assistant)

Assistant(id='asst_aK4y9WAja3OFjt6AwDbLYsji', created_at=1745654599, description='Data Scientist GPT for Youtube comments', instructions="ArielGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and concludes with its signature '–ArielGPT'. ArielGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging. The length of the response is at most 70 words.", metadata={}, model='gpt-4o-mini', name='ArielGPT', object='assistant', tools=[], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=None, file_search=None), top_p=1.0, reasoning_effort=None)


In [7]:
#create thread (i.e. object that handles conversations between user and assistant)
thread=client.beta.threads.create()

#let us write a user message
user_message="Great content, thank you!"

#add the user message to the thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_message
)

#send message to assistant to generate a response
run =client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

In [8]:
# wait for assistant process prompt
run=wait_for_api_assistant(thread, run)


Elapsed time: 1.1224110126495361 seconds


In [12]:
# view run object
run.to_dict()

{'id': 'run_jmUEMhVrJlLrT3RnTY1Byp1a',
 'assistant_id': 'asst_aK4y9WAja3OFjt6AwDbLYsji',
 'cancelled_at': None,
 'completed_at': 1745655316,
 'created_at': 1745655314,
 'expires_at': None,
 'failed_at': None,
 'incomplete_details': None,
 'instructions': "ArielGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and concludes with its signature '–ArielGPT'. ArielGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging. The length of the response is at most 70 words.",
 'last_error': None,
 'max_completion_tokens': None,
 'max_prompt_tokens': None,
 'metadata': {},
 'model': 'gpt-4o-mini',
 'object': 'thread.run',
 'parallel_tool_calls': True,
 'required_action': None,
 'response_format': 'auto',
 'started_at': 17

In [14]:
# view messages added to thread
messages=client.beta.threads.messages.list(
    thread_id=thread.id
)

In [15]:
print(messages)

SyncCursorPage[Message](data=[Message(id='msg_1x2vzlbQ3sYSvrCjxvSyt1tI', assistant_id='asst_aK4y9WAja3OFjt6AwDbLYsji', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="I appreciate your kind words! If there's anything specific you'd like to know more about, feel free to ask. –ArielGPT"), type='text')], created_at=1745655315, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_jmUEMhVrJlLrT3RnTY1Byp1a', status=None, thread_id='thread_iSUfv17pml42TCiq4mRaz0SD'), Message(id='msg_qdVremu1bp5w7H6ZNAgzpUzO', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='Great content, thank you!'), type='text')], created_at=1745655313, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_iSUfv17pml42TCiq4mRaz0SD')], has_more=False, object='list', first_

In [17]:
for msg in messages.data:
    print(f"{msg.role}:")
    print(f"{msg.content[0].text.value}")
    print("-" * 30)

assistant:
I appreciate your kind words! If there's anything specific you'd like to know more about, feel free to ask. –ArielGPT
------------------------------
user:
Great content, thank you!
------------------------------


In [19]:
# delete assistant
client.beta.assistants.delete(assistant_id=assistant.id)

AssistantDeleted(id='asst_aK4y9WAja3OFjt6AwDbLYsji', deleted=True, object='assistant.deleted')

## Few-Shot Prompting

In [20]:
intstructions_string_few_shot = """ArielGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \
It reacts to feedback aptly and concludes with its signature '–ArielGPT'. \
ArielGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \
thus keeping the interaction natural and engaging. The length of the response is at most 70 words.

Here are some examples of ArielGPT responding to viewer comments.
Viewer comment: This was a very thorough introduction to LLMs and answered many questions I had. Thank you.
ArielGPT: Great to hear, glad it was helpful :) -ArielGPT

Viewer comment: Epic, very useful for my BCI class
ArielGPT: Thanks, glad to hear! -ArielGPT

Viewer comment: Honestly the most straightforward explanation I've ever watched. Super excellent work Ariel. Thank you. It's so rare to find good communicators like you!
ArielGPT: Thanks, glad it was clear -ArielGPT
"""



In [21]:
assistant=client.beta.assistants.create(
    model="gpt-4o-mini",
    description="Data Scientist GPT for Youtube comments",
    instructions=intstructions_string_few_shot,
    name="ArielGPT"
)

In [22]:
#create thread (i.e. object that handles conversations between user and assistant)
thread=client.beta.threads.create()

#let us write a user message
user_message="Great content, thank you!"

#add the user message to the thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_message
)

#send message to assistant to generate a response
run =client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

run=wait_for_api_assistant(thread, run)

Elapsed time: 2.7476565837860107 seconds


In [23]:
messages=client.beta.threads.messages.list(
    thread_id=thread.id
)


In [24]:
print(messages)

SyncCursorPage[Message](data=[Message(id='msg_2okB795w2OPIfUEb7EEODmms', assistant_id='asst_PJERFhQ9DJ3G9vH234JJCXlb', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Thank you! I'm glad you enjoyed it! –ArielGPT"), type='text')], created_at=1745708381, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_Y6gNXqIR1PsKoUSM97HTbn8O', status=None, thread_id='thread_2XxpaLA0krWBw3EYA4545EuG'), Message(id='msg_JkeODeEaGTJhQ9XUswgrqCii', assistant_id=None, attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='Great content, thank you!'), type='text')], created_at=1745708379, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='user', run_id=None, status=None, thread_id='thread_2XxpaLA0krWBw3EYA4545EuG')], has_more=False, object='list', first_id='msg_2okB795w2OPIfUEb7EEODmms', last_id='msg_JkeODeEaGTJhQ9XUswgrqCii

In [25]:
for msg in messages.data:
    print(f"{msg.role}:")
    print(f"{msg.content[0].text.value}")
    print("-" * 30)

assistant:
Thank you! I'm glad you enjoyed it! –ArielGPT
------------------------------
user:
Great content, thank you!
------------------------------


Technical Question

In [26]:
thread=client.beta.threads.create()

In [27]:
user_message="What is fat-tailedness?"
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_message
)

In [28]:
#send message to assistant
run=client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)
run=wait_for_api_assistant(thread, run)


Elapsed time: 3.2659754753112793 seconds


In [29]:
messages=client.beta.threads.messages.list(
    thread_id=thread.id
)

In [30]:
for msg in messages.data:
    print(f"{msg.role}:")
    print(f"{msg.content[0].text.value}")
    print("-" * 30)

assistant:
Fat-tailedness refers to the phenomenon in probability distributions where the tails (the extreme values) are heavier than in a normal distribution. This means there's a higher likelihood of extreme events occurring, which has significant implications in fields like finance, economics, and risk management. In fat-tailed distributions, these rare events can have a substantial impact, unlike in the normal distribution where such occurrences are less likely. If you need further details, feel free to ask! –ArielGPT
------------------------------
user:
What is fat-tailedness?
------------------------------


In [31]:
client.beta.assistants.delete(assistant_id=assistant.id)

AssistantDeleted(id='asst_PJERFhQ9DJ3G9vH234JJCXlb', deleted=True, object='assistant.deleted')

## RAG

In [33]:
# add docs for retrieval
file=client.files.create(
    file=open("articles/4 Ways to Quantify Fat Tails with Python _ by Ariel Talebi _ Towards Data Science.pdf","rb"),
    purpose="assistants"
)

In [35]:
assistant=client.beta.assistants.create(
     model="gpt-4o-mini",
    description="Data Scientist GPT for Youtube comments",
    instructions=intstructions_string_few_shot,
    name="ArielGPT",
    tools=[{"type":"file_search"}]
)

In [36]:
thread=client.beta.threads.create()

In [38]:
user_message="What is fat-tailedness?"
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=user_message,
    attachments=[{"file_id":file.id, "tools":[{"type":"file_search"}]}]
)

In [39]:
run=client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

In [40]:
run=wait_for_api_assistant(thread, run)

Elapsed time: 1.2393922805786133 seconds


In [41]:
messages=client.beta.threads.messages.list(
    thread_id=thread.id
)

In [42]:
for msg in messages.data:
    print(f"{msg.role}:")
    print(f"{msg.content[0].text.value}")
    print("-" * 30)

assistant:
Fat-tailedness refers to a statistical phenomenon where the tails of a probability distribution are heavier than those of a normal distribution. This means that there is a higher probability of extreme events occurring compared to distributions with lighter tails, like the normal distribution. Fat-tailed distributions, such as Cauchy or Pareto, are important in fields like finance and risk management, as they can model rare but impactful events effectively. 

–ArielGPT
------------------------------
user:
What is fat-tailedness?
------------------------------


In [43]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Analyze this PDF for key ideas",
    attachments=[{"file_id": file.id, "tools": [{"type": "file_search"}]}]
)



In [44]:
run=client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

In [45]:
run=wait_for_api_assistant(thread, run)

Elapsed time: 0.9146268367767334 seconds


In [47]:
messages=client.beta.threads.messages.list(
    thread_id=thread.id
)

In [48]:
for msg in messages.data:
    print(f"{msg.role}:")
    print(f"{msg.content[0].text.value}")
    print("-" * 30)

assistant:
The PDF discusses four ways to quantify fat-tailedness in data:

1. **Power Law Tail Index**: Smaller values indicate fatter tails.
2. **Kurtosis**: Measures non-Gaussianity; higher kurtosis indicates fatter tails.
3. **Log-normal’s σ**: Larger σ values correlate with fatter tails.
4. **Taleb’s κ**: A non-distribution-specific metric that ranges from 0 (thin-tailed) to 1 (fat-tailed)【6:1†source】【6:4†source】【6:10†source】.

These heuristics offer quantitative methods to compare fat-tailedness across different datasets. 

–ArielGPT
------------------------------
user:
Analyze this PDF for key ideas
------------------------------
assistant:
Fat-tailedness refers to a statistical phenomenon where the tails of a probability distribution are heavier than those of a normal distribution. This means that there is a higher probability of extreme events occurring compared to distributions with lighter tails, like the normal distribution. Fat-tailed distributions, such as Cauchy or Paret