## Challenge 7: Deploy your App

### Create the final index (it will take around 25 minutes)

In [7]:
import sys, os, dotenv
dotenv.load_dotenv()
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '../../lib')))

# Setup environment

# OpenAI
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_MODEL = os.getenv("AZURE_OPENAI_MODEL")
AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_EMBEDDING = os.getenv("AZURE_OPENAI_EMBEDDING")
# Azure Search
AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
AZURE_SEARCH_API_KEY = os.getenv("AZURE_SEARCH_API_KEY")
AZURE_SEARCH_INDEX = "itsarag-fullindex"
# Azure AI Document Intelligence
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT")
AZURE_DOCUMENT_INTELLIGENCE_API_KEY = os.getenv("AZURE_DOCUMENT_INTELLIGENCE_API_KEY")
# Azure Blob Storage
AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
AZURE_STORAGE_CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER")
AZURE_STORAGE_FOLDER = os.getenv("AZURE_STORAGE_FOLDER")

# Import Libraries
import os
from azure.ai.documentintelligence.models import DocumentAnalysisFeature

# Custom Libraries
from its_a_rag.doc_intelligence import AzureAIDocumentIntelligenceLoader
from its_a_rag import ingestion

In [8]:
# Create the index for Azure Search store and Embedding
vector_store_multimodal, aoai_embeddings = ingestion.create_multimodal_vector_store(AZURE_SEARCH_INDEX, 
                                                                           AZURE_OPENAI_API_KEY, 
                                                                           AZURE_OPENAI_ENDPOINT,
                                                                           AZURE_OPENAI_API_VERSION,
                                                                           AZURE_OPENAI_EMBEDDING, 
                                                                           AZURE_SEARCH_ENDPOINT, 
                                                                           AZURE_SEARCH_API_KEY)

In [None]:
# Index

# Index: Load files

# Get list of files in a local folder
folder = os.path.join(os.getcwd(), '../../../its-a-rag/data/fsi/pdf') # TODO: FIX ME
files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
files = [os.path.join(folder, f) for f in files]

# For each file
for file in files:
    # Get the file name
    pdf_file_name = file.split("\\")[-1]
    # Index : Load the file and create a document
    print("Processing: ", file)
    loader = AzureAIDocumentIntelligenceLoader(file_path=file, 
                                           api_key = AZURE_DOCUMENT_INTELLIGENCE_API_KEY, 
                                           api_endpoint = AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
                                           api_model="prebuilt-layout",
                                           api_version="2024-02-29-preview",
                                           analysis_features = [DocumentAnalysisFeature.OCR_HIGH_RESOLUTION])
    docs = loader.load()
    # Index : Split
    docs = ingestion.advanced_text_splitter(docs,pdf_file_name)
    # Index : Store
    vector_store_multimodal.add_documents(documents=docs)
    print ("Indexed: ", pdf_file_name)