# Azure AI Search backup and restore sample

This notebook demonstrates how to backup and restore a search index and migrate it to another instance.

The only pre-requsitite is that your search index has a `key` field that is `filterable` and `sortable`. If you don't have one, you can create a new field and assign unique values to your search index. 

It is important to note that only fields marked as `retrievable` can be successfully backed up and restored. It's crucial to consider whether or not you want your vector fields to be marked as `retrievable` in your search index. Marking vector fields as `retrievable` will allow you to backup and restore them and use them for any purpose, whereas NOT marking them as `retrievable` will save you storage costs, but the tradeoff is that you will not be able to backup and restore those fields.

Please review this sample and follow the instructions provided in this Jupyter Python notebook to backup and restore your Azure AI Search indexes.

In [None]:
! pip install azure-search-documents --pre
! pip install tqdm

This script demonstrates backing up and restoring an Azure AI Search index between two services. The `backup_and_restore_index` function retrieves the source index definition, creates a new target index, backs up all documents, and restores them to the target index.

In [None]:
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient
import tqdm  
  
def create_clients(endpoint, key, index_name):  
    search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=AzureKeyCredential(key))  
    index_client = SearchIndexClient(endpoint=endpoint, credential=AzureKeyCredential(key))  
    return search_client, index_client

def total_count(search_client):
    response = search_client.search(include_total_count=True, search_text="*", top=0)
    return response.get_count()
  
def search_results_with_filter(search_client, key_field_name):
    last_item = None
    response = search_client.search(search_text="*", top=100000, order_by=key_field_name).by_page()
    while True:
        for page in response:
            page = list(page)
            if len(page) > 0:
                last_item = page[-1]
                yield page
            else:
                last_item = None
        
        if last_item:
            response = search_client.search(search_text="*", top=100000, order_by=key_field_name, filter=f"{key_field_name} gt '{last_item[key_field_name]}'").by_page()
        else:
            break

def search_results_without_filter(search_client):
    response = search_client.search(search_text="*", top=100000).by_page()
    for page in response:
        page = list(page)
        yield page

def backup_and_restore_index(source_endpoint, source_key, source_index_name, target_endpoint, target_key, target_index_name):  
    # Create search and index clients  
    source_search_client, source_index_client = create_clients(source_endpoint, source_key, source_index_name)  
    target_search_client, target_index_client = create_clients(target_endpoint, target_key, target_index_name)  
  
    # Get the source index definition  
    source_index = source_index_client.get_index(name=source_index_name)
    non_retrievable_fields = []
    for field in source_index.fields:
        if field.hidden == True:
            non_retrievable_fields.append(field)
        if field.key == True:
            key_field = field

    if not key_field:
        raise Exception("Key Field Not Found")
    
    if len(non_retrievable_fields) > 0:
        print(f"WARNING: The following fields are not marked as retrievable and cannot be backed up and restored: {', '.join(f.name for f in non_retrievable_fields)}")
  
    # Create target index with the same definition 
    source_index.name = target_index_name
    target_index_client.create_or_update_index(source_index)
  
    document_count = total_count(source_search_client)
    can_use_filter = key_field.sortable and key_field.filterable
    if not can_use_filter:
        print("WARNING: The key field is not filterable or not sortable. A maximum of 100,000 records can be backed up and restored.")
    # Backup and restore documents  
    all_documents = search_results_with_filter(source_search_client, key_field.name) if can_use_filter else search_results_without_filter(source_search_client)

    print("Backing up and restoring documents:")  
    failed_documents = 0  
    failed_keys = []  
    with tqdm.tqdm(total=document_count) as progress_bar:  
        for page in all_documents:
            result = target_search_client.upload_documents(documents=page)  
            progress_bar.update(len(result))  
  
            for item in result:  
                if item.succeeded is not True:  
                    failed_documents += 1
                    failed_keys.append(page[result.index_of(item)].id)  
                    print(f"Document upload error: {item.error.message}")  
  
    if failed_documents > 0:  
        print(f"Failed documents: {failed_documents}")  
        print(f"Failed document keys: {failed_keys}")  
    else:  
        print("All documents uploaded successfully.")  
  
    print(f"Successfully backed up '{source_index_name}' and restored to '{target_index_name}'")  
    return source_search_client, target_search_client, all_documents  
  
# Replace with your service endpoints, keys, and index names
source_endpoint = "YOUR_SEARCH_SERVICE_SOURCE_ENDPOINT"  
source_key = "YOUR_SEARCH_SERVICE_SOURCE_ADMIN_KEY"  
source_index_name = "YOUR_SEARCH_SERVICE_SOURCE_INDEX_NAME"  
target_endpoint = "YOUR_SEARCH_SERVICE_TARGET_ENDPOINT" 
target_key = "YOUR_SEARCH_SERVICE_TARGET_ADMIN_KEY"  
target_index_name = "YOUR_SEARCH_SERVICE_TARGET_INDEX_NAME"

source_search_client, target_search_client, all_documents = backup_and_restore_index(source_endpoint, source_key, source_index_name, target_endpoint, target_key, target_index_name)  


The verify_counts function compares document counts between source and target indexes after backup and restore. It prints a message indicating if the document counts match or not.

In [None]:
def verify_counts(source_search_client, target_search_client):  
    source_document_count = source_search_client.get_document_count()  
    target_document_count = target_search_client.get_document_count()  
  
    print(f"Source document count: {source_document_count}")  
    print(f"Target document count: {target_document_count}")  
  
    if source_document_count == target_document_count:  
        print("Document counts match.")  
    else:  
        print("Document counts do not match.")  
  
# Call the verify_counts function with the search_clients returned by the backup_and_restore_index function  
verify_counts(source_search_client, target_search_client)  
