In [1]:
import mlflow.deployments

## Actions to take
1. Create secret scope for OpenAI API key
2. Create external model endpoint to OpenAI model completions
3. Create tables and schema to store chunked PDF document text and metadata

## Create secret scope for OpenAI API key

**1. Create scope**


`databricks secrets create-scope openai`

**2. Create secret - prompt will ask for secret value**


`databricks secrets put-secret openai apikey`


**3. Confirm scope was created**


`databricks secrets list-scopes`

## Create external model endpoint pointing at OpenAI model

In [0]:
client = mlflow.deployments.get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name="openai-completion-endpoint",
    config={
        "served_entities": [
            {
                "name": "completions",
                "external_model": {
                    "name": "o3-mini",
                    "provider": "openai",
                    "task": "llm/v1/chat",
                    "openai_config": {
                        "openai_api_key": "{{secrets/openai/apikey}}",
                    },
                },
            }
        ],
    },
)

## Create tables to store document text and metadata

In [0]:
%sql
CREATE TABLE IF NOT EXISTS financial_rag.rag.edgar_pdf_metadata (
    fileNumber BIGINT PRIMARY KEY,
    companyName STRING,
    tradingSymbol STRING,
    fiscalYearEndDate STRING,
    documentHash STRING
) TBLPROPERTIES (delta.enableChangeDataFeed = true);

CREATE TABLE IF NOT EXISTS financial_rag.rag.edgar_pdf_text (
    id BIGINT GENERATED ALWAYS AS IDENTITY,
    edgar_pdf_metadata_fileNumber BIGINT,
    text STRING,
    pageNumber INT,
    FOREIGN KEY (edgar_pdf_metadata_fileNumber) REFERENCES financial_rag.rag.edgar_pdf_metadata(fileNumber)
) TBLPROPERTIES (delta.enableChangeDataFeed = true);