# Working with async

`openaivec.aio` is a experimental module that provides an async interface to OpenAI's API. It is designed to be used with Python's `asyncio` library, allowing you to make non-blocking requests to the OpenAI API.

In [None]:
import os
from openai import AsyncOpenAI
from openaivec.aio import pandas_ext
import pandas as pd
from pydantic import BaseModel, Field
from enum import Enum

pandas_ext.use(AsyncOpenAI())
pandas_ext.responses_model("gpt-4.1-mini")
pandas_ext.embeddings_model("text-embedding-3-small")

The model name 'gpt-4.1-mini' is not supported by tiktoken. Instead, using the 'o200k_base' encoding.


In [2]:
files_dict = {"path": []}
for root, dirs, files in os.walk('../../src/openaivec'):
    for file in files:
        path = os.path.join(root, file)
        if "__" not in path:
            files_dict["path"].append(os.path.join(root, file))

implements_df = pd.DataFrame(files_dict).assign(
    module=lambda df: df["path"].str.split("/")
        .map(lambda x: x[3:])
        .map(lambda x: ".".join(x))
        .map(lambda x: x.replace(".py", "")),
)

In [3]:
implements_df

Unnamed: 0,path,module
0,../../src/openaivec/serialize.py,openaivec.serialize
1,../../src/openaivec/responses.py,openaivec.responses
2,../../src/openaivec/log.py,openaivec.log
3,../../src/openaivec/util.py,openaivec.util
4,../../src/openaivec/embeddings.py,openaivec.embeddings
5,../../src/openaivec/pandas_ext.py,openaivec.pandas_ext
6,../../src/openaivec/spark.py,openaivec.spark
7,../../src/openaivec/prompt.py,openaivec.prompt
8,../../src/openaivec/beta/batch.py,openaivec.beta.batch
9,../../src/openaivec/beta/schema.py,openaivec.beta.schema


In [None]:
class OpjectType(str, Enum):
    FUNCTION = "function"
    CLASS = "class"
    VARIABLE = "variable"

class Question(BaseModel):
    question: str = Field(description="The specific question related to the code section.")
    answer: str = Field(description="The corresponding answer explaining the code aspect.")

class Section(BaseModel):
    name: str = Field(description="The name of the function or class being documented.")
    type: OpjectType = Field(description="The type of the code section, either a function or a class.")
    description: str = Field(description="A concise summary of the function or class's purpose and functionality.")
    questions: list[Question] = Field(description="A list of Q&A pairs clarifying aspects of this code section.")

class Document(BaseModel):
    sections: list[Section] = Field(description="A list of sections, each documenting a specific function or class.")

Note that async methods are not available in lambda functions.

In [5]:
codes_df = implements_df.assign(
        code=lambda df: df['path'].apply(lambda x: open(x).read()),
)

docs_df = codes_df.assign(
    doc=await codes_df["code"].ai.responses(
        instructions="Generate a detailed documentation for the code, including sections for each function or class. Each section should include a name, description, and a list of questions and answers that clarify the code's functionality.",
        response_format=Document,
        batch_size=1,
    )
).pipe(
    lambda df: df.drop(columns=["code", "path"], errors="ignore")
)


In [6]:
docs_df

Unnamed: 0,module,doc
0,openaivec.serialize,"sections=[Section(name='serialize_base_model',..."
1,openaivec.responses,sections=[Section(name='_vectorize_system_mess...
2,openaivec.log,"sections=[Section(name='observe', description=..."
3,openaivec.util,"sections=[Section(name='split_to_minibatch', d..."
4,openaivec.embeddings,"sections=[Section(name='VectorizedEmbeddings',..."
5,openaivec.pandas_ext,"sections=[Section(name='use', description='Reg..."
6,openaivec.spark,"sections=[Section(name='UDFBuilder', descripti..."
7,openaivec.prompt,"sections=[Section(name='Example', description=..."
8,openaivec.beta.batch,"sections=[Section(name='JsonlUDFBuilder', desc..."
9,openaivec.beta.schema,"sections=[Section(name='Entity', description='..."


In [7]:
questions_df = docs_df.pipe(
    lambda df: df
    .ai.extract("doc")
    .explode("doc_sections")
    .ai.extract("doc_sections")
    .explode("doc_sections_questions")
    .ai.extract("doc_sections_questions")
)

In [9]:
questions_df.assign(
    embedding=await questions_df["doc_sections_questions_question"].ai.embeddings()
)

Unnamed: 0,module,doc_sections_name,doc_sections_description,doc_sections_questions_question,doc_sections_questions_answer,embedding
0,openaivec.serialize,serialize_base_model,Serializes a Pydantic BaseModel class into its...,What does serialize_base_model do?,It converts a Pydantic BaseModel class into it...,"[0.0070360457, 0.024257848, 0.008134634, -0.01..."
0,openaivec.serialize,serialize_base_model,Serializes a Pydantic BaseModel class into its...,What is the input type for serialize_base_model?,The input is a Pydantic BaseModel class type (...,"[0.011876273, 0.044357162, -0.006140844, -0.02..."
0,openaivec.serialize,serialize_base_model,Serializes a Pydantic BaseModel class into its...,What is the output of serialize_base_model?,A dictionary representing the JSON schema of t...,"[0.024041146, 0.03602427, 0.030482074, -0.0335..."
0,openaivec.serialize,dereference_json_schema,Processes a JSON schema dictionary to replace ...,What is the purpose of dereference_json_schema?,To replace all JSON schema $ref references wit...,"[0.044597678, 0.022845676, 0.02155757, -0.0035..."
0,openaivec.serialize,dereference_json_schema,Processes a JSON schema dictionary to replace ...,How does dereference_json_schema handle nested...,It recursively traverses dictionaries and list...,"[0.030821564, -0.00038608705, 0.051987603, -0...."
...,...,...,...,...,...,...
13,openaivec.aio.pandas_ext,OpenAIVecSeriesAccessor,A pandas Series accessor registered as `.ai` t...,What is the purpose of `.ai.count_tokens`?,It counts the number of tokens in each Series ...,"[0.019880824, 0.011035094, 0.032934606, -0.024..."
13,openaivec.aio.pandas_ext,OpenAIVecSeriesAccessor,A pandas Series accessor registered as `.ai` t...,What does `.ai.extract` do?,It expands a Series of Pydantic models or dict...,"[-0.027163144, 0.013949604, 0.0043303133, -0.0..."
13,openaivec.aio.pandas_ext,OpenAIVecDataFrameAccessor,A pandas DataFrame accessor registered as `.ai...,What does `.ai.extract(column)` do?,It flattens a specified column containing Pyda...,"[-0.0042420933, -3.9657563e-05, 0.0112626655, ..."
13,openaivec.aio.pandas_ext,OpenAIVecDataFrameAccessor,A pandas DataFrame accessor registered as `.ai...,How does `.ai.responses` work on a DataFrame?,"It serializes each row to JSON, then asynchron...","[-0.024903353, 0.007675921, 0.047522407, 0.003..."
