#### Populating Azure AI Search Index using Embedded JSON Files

In [1]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient

import os
from dotenv import load_dotenv

In [2]:
load_dotenv(override=True)

# Azure Search service details
service_name = os.getenv("AZURE_SEARCH_SERVICE_NAME")
admin_key = os.getenv("AZURE_SEARCH_SERVICE_KEY")
index_name = os.getenv("AZURE_SEARCH_INDEX_NAME")

# Initialize the search index client
endpoint = f"https://{service_name}.search.windows.net/"
credential = AzureKeyCredential(admin_key)
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)

In [3]:
import os
import json
from azure.search.documents import SearchClient

# Initialize the search client
search_client = SearchClient(
    endpoint=endpoint, index_name=index_name, credential=credential)


def process_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)

    # Ensure the embedding is a list of floats
    data['embedding'] = [float(x) for x in data['embedding']]

    return data

In [4]:
from ipywidgets import IntProgress
from IPython.display import display

In [5]:
# Directory containing the JSON files
directory = '../data/embedded'

# List to store all documents
documents = []

files = os.listdir(directory)
max_count = len(files)
progress_bar = IntProgress(min=0, max=max_count, description="Progress:")

display(progress_bar)

print(f"Total files to process: {len(files)}")

for filename in files:
    if filename.endswith('.json'):
        print(f"Processing {filename}")

        file_path = os.path.join(directory, filename)
        document = process_json_file(file_path)
        result = search_client.upload_documents([document])

        print(f"Uploaded {file_path} ...")

        progress_bar.value += 1

print("Data upload completed.")

IntProgress(value=0, description='Progress:', max=999)

Total files to process: 999
Processing review_1.json
Uploaded ../data/embedded\review_1.json ...
Processing review_10.json
Uploaded ../data/embedded\review_10.json ...
Processing review_100.json
Uploaded ../data/embedded\review_100.json ...
Processing review_101.json
Uploaded ../data/embedded\review_101.json ...
Processing review_102.json
Uploaded ../data/embedded\review_102.json ...
Processing review_103.json
Uploaded ../data/embedded\review_103.json ...
Processing review_104.json
Uploaded ../data/embedded\review_104.json ...
Processing review_105.json
Uploaded ../data/embedded\review_105.json ...
Processing review_106.json
Uploaded ../data/embedded\review_106.json ...
Processing review_107.json
Uploaded ../data/embedded\review_107.json ...
Processing review_108.json
Uploaded ../data/embedded\review_108.json ...
Processing review_109.json
Uploaded ../data/embedded\review_109.json ...
Processing review_11.json
Uploaded ../data/embedded\review_11.json ...
Processing review_110.json
Uplo