In [None]:
import json
import os
from azure.cosmos import CosmosClient, exceptions, PartitionKey

# Cosmos DB connection
endpoint = "https://anildwa-ncus-hybridsearch.documents.azure.com:443/"
key = ""

client = CosmosClient(endpoint, key)
database_name = 'vectordb'
container_name = 'docusign_performance_test2'

# Create database if not exists
client.create_database_if_not_exists(id=database_name)
database = client.get_database_client(database_name)

# Single vector embedding policy (dimension=384 to match content_vector size)
vector_embedding_policy = {
    "vectorEmbeddings": [
        {
            "path": "/content_vector",
            "dataType": "float32",
            "distanceFunction": "cosine",
            "dimensions": 384
        }
    ]
}

# Single full text policy
full_text_paths_policy = {
   "defaultLanguage": "en-US",
   "fullTextPaths": [
       {
           "path": "/content",
           "language": "en-US"
       },
       {
           "path": "/title",
           "language": "en-US"
       }
   ]
}

# Indexing policy (using diskANN for /sectionVector)
indexing_policy_diskANN = {
    "indexingMode": "consistent",
    "automatic": True,
    "includedPaths": [
        {
            "path": "/*"
        }
    ],
    "excludedPaths": [
        {
            "path": "/_etag/?"
        },
        {
            "path": "/content_vector/*"
        }
    ],
    "fullTextIndexes": [
        {
            "path": "/content"
        },
        {
            "path": "/title"
        }
    ],
    "vectorIndexes": [
        {
            "path": "/content_vector",
            "type": "diskANN"
        }
    ]
}

# Create container with the specified policies
container = database.create_container_if_not_exists(
    id=container_name,
    partition_key=PartitionKey(path="/id"),
    vector_embedding_policy=vector_embedding_policy,
    indexing_policy=indexing_policy_diskANN,
    full_text_policy=full_text_paths_policy,
    offer_throughput=10000
)



In [None]:
# Read records from data.jsonl, transform fields, bulk upsert
count = 0
with open("data.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        doc = json.loads(line)
        # Rename fields to match our container schema

        container.upsert_item(doc)
        count += 1
print(f"Inserted {count} documents into Cosmos DB.")