In [1]:
!pip3 install openai --upgrade

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [1]:
import pandas as pd
import time
from openai import OpenAI

API_KEY = '<YOUR API KEY>'
client = OpenAI(api_key = API_KEY)

Step 1. Create an Assistant

In [3]:
assistant = client.beta.assistants.create(
  name="Math Tutor",
  instructions="You are a personal math tutor. Write and run code to answer math questions.",
  tools=[{"type": "code_interpreter"}],
  model="gpt-4o-mini",
)

Step 2: Create a Thread

In [4]:
thread = client.beta.threads.create()

Step 3: Add a Message to the Thread

In [5]:
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="I need to solve the equation `3x + 11 = 14`. Can you help me?"
)

Step 4: Create a Run

In [7]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

Step 5: Display the Assistants response

In [8]:
run = client.beta.threads.runs.retrieve(
    thread_id = thread.id,
    run_id = run.id
)

In [10]:
messages = client.beta.threads.messages.list(
    thread_id = thread.id
)

In [11]:
for message in reversed(messages.data):
    print(message.role + ": " + message.content[0].text.value)

user: I need to solve the equation `3x + 11 = 14`. Can you help me?
assistant: The solution to the equation \( 3x + 11 = 14 \) is \( x = 1 \).
assistant: If you have any more equations or math problems you'd like help with, feel free to ask!


AI assistant with files for Data Viz

In [3]:
file = client.files.create(
  file=open("restaurant_data.csv", "rb"),
  purpose='assistants'
)

create the Assistant with the code_interpreter tool enabled and provide the file as a resource to the tool.

In [4]:
assistant = client.beta.assistants.create(
  name="Data visualizer",
  description="You are great at creating beautiful data visualizations. You analyze data present in .csv files, understand trends, and come up with data visualizations relevant to those trends. You also share a brief text summary of the trends observed.",
  model="gpt-4o",
  tools=[{"type": "code_interpreter"}],
  tool_resources={
    "code_interpreter": {
      "file_ids": [file.id]
    }
  }
)

Create a Thread

In [5]:
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "Create 3 data visualizations based on the trends in this file.",
      "attachments": [
        {
          "file_id": file.id,
          "tools": [{"type": "code_interpreter"}]
        }
      ]
    }
  ]
)

In [6]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

In [7]:
run = client.beta.threads.runs.retrieve(
    thread_id = thread.id,
    run_id = run.id
)

In [8]:
messages = client.beta.threads.messages.list(
    thread_id = thread.id
)

In [17]:
for msg in messages.data:
    role = msg.role
    for content_item in msg.content:
        if content_item.type == "text":
            text_content = content_item.text.value
            print(f"{role.capitalize()}: {text_content}")
        elif content_item.type == "image_file":
            image_file_id = content_item.image_file.file_id
            print(f"Image File ID: {image_file_id}")

        # Retrieve the image content using the client's method
            image_data = client.files.content(image_file_id)
            image_data_bytes = image_data.read()

        # Save the image data to a file
        with open(f"./image_{image_file_id}.png", "wb") as file:
            file.write(image_data_bytes)
            print(f"Image {image_file_id} saved as image_{image_file_id}.png")

Image File ID: file-S0R1X8Nd50lIb4ILnRC84abP
Image file-S0R1X8Nd50lIb4ILnRC84abP saved as image_file-S0R1X8Nd50lIb4ILnRC84abP.png
Image File ID: file-KwdjewSnDEzPytWntakYpIK8
Image file-KwdjewSnDEzPytWntakYpIK8 saved as image_file-KwdjewSnDEzPytWntakYpIK8.png
Image File ID: file-m7HU9KKrCjup2GrANox7Z6em
Image file-m7HU9KKrCjup2GrANox7Z6em saved as image_file-m7HU9KKrCjup2GrANox7Z6em.png
Assistant: Here are the three visualizations based on the trends in the dataset:

1. **Cuisine vs. Average Meal Price**: 
    - The box plot illustrates the distribution of average meal prices for different cuisines. This can help identify which cuisines tend to be more expensive or affordable.

2. **Location vs. Revenue**:
    - The box plot compares the revenue between restaurants located in rural areas versus downtown. This can highlight the differences in revenue generation based on the restaurant's location.

3. **Rating vs. Ambience Score and Service Quality**:
    - The scatter plot shows how the

Creating a RAG using assistant

In [20]:
file = client.files.create(
    file = open("ISLP_book.pdf", "rb"),
    purpose = "assistants"
)

Step 1: Create a new Assistant with File Search Enabled

In [26]:
assistant = client.beta.assistants.create(
  name="Data Science Assistant",
  instructions="You are an expert Data Scientist. Use you knowledge base to answer questions about Data Science Questions",
  model="gpt-4o-mini",
  tools=[{"type": "file_search"}],
)

Step 2: Upload files and add them to a Vector Store

In [27]:
# Create a vector store caled "Financial Statements"
vector_store = client.beta.vector_stores.create(name="ISLP")
 
# Ready the files for upload to OpenAI
file_paths = ["ISLP_book.pdf"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


Step 3: Update the assistant to use the new Vector Store

In [28]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

Step 4: Create a thread

In [30]:
# Upload the user provided file to OpenAI
message_file = client.files.create(
  file=open("ISLP_book.pdf", "rb"), purpose="assistants"
)
 
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "What are the assumptions of linear regression",
      # Attach the new file to the message.
      "attachments": [
        { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
      ],
    }
  ]
)
 
# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

ToolResourcesFileSearch(vector_store_ids=['vs_iZx4HHDh0QN1rppwGU4S4nXL'])


Step 5: Create a run and check the output

In [31]:
# Use the create and poll SDK helper to create a run and poll the status of
# the run until it's in a terminal state.

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

print(message_content.value)
print("\n".join(citations))

The assumptions of linear regression are crucial for ensuring that the model produces valid and interpretable results. The main assumptions include:

1. **Linearity**: There is a linear relationship between the independent variables (predictors) and the dependent variable (response).

2. **Independence**: The residuals (errors) are independent. This means that the value of the dependent variable for one observation should not influence the value for another observation.

3. **Homoscedasticity**: The residuals have constant variance at all levels of income. In other words, the spread of the residuals should be approximately the same across all values of the independent variables.

4. **Normality of Residuals**: The residuals should be approximately normally distributed. This is especially important for hypothesis testing regarding coefficients.

5. **No or Little Multicollinearity**: Multicollinearity occurs when independent variables are highly correlated with each other, which can mak