In [None]:
! pip install llama-index==11.6
! pip install python-dotenv==1.0.1
! pip install llama-index-vector-stores-azureaisearch==0.2.1
! pip install azure-search-documents==11.5.1
! pip install llama-index-embeddings-azure-openai==0.2.5
! pip install llama-index-llms-azure-openai==0.2.1
! pip install nest_asyncio==1.6.0

## Step 1: Initial Setup
First, we set up the environment, load the necessary credentials, and initialize the Azure OpenAI and Azure AI Search services.

In [15]:

# Initial Setup: Load environment variables and initialize services
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.vector_stores.azureaisearch import AzureAISearchVectorStore, IndexManagement
from llama_index.core.settings import Settings


# Load environment variables
load_dotenv()

# Environment Variables
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME")
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME")
SEARCH_SERVICE_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
SEARCH_SERVICE_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
INDEX_NAME = "llamaindex-property-graph"

# Initialize Azure OpenAI models
llm = AzureOpenAI(
    model=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

embed_model = AzureOpenAIEmbedding(
    model=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-02-01"
)

Settings.llm = llm
Settings.embed_model = embed_model

# Initialize search clients
credential = AzureKeyCredential(SEARCH_SERVICE_API_KEY)
index_client = SearchIndexClient(endpoint=SEARCH_SERVICE_ENDPOINT, credential=credential)
search_client = SearchClient(endpoint=SEARCH_SERVICE_ENDPOINT, index_name=INDEX_NAME, credential=credential)


## Step 2: Load Documents
Next, load the documents (in this case, the "state_of_the_union.txt" file) that we will use to generate the property graph.

In [8]:
# Load Documents
from llama_index.core import SimpleDirectoryReader

# Load documents from the text file
documents = SimpleDirectoryReader(
    input_files=["data/txt/state_of_the_union.txt"],
).load_data()


## Step 3: Property Graph Construction (Default Mode)
We now initialize the `AzureAISearchVectorStore` to persist vectors in Azure AI Search and build the property graph using the default implicit method for extracting entities and relationships.

In [17]:
# Property Graph Construction: Implicit Extraction Method
from llama_index.core.indices.property_graph import ImplicitPathExtractor
import nest_asyncio
from llama_index.core import PropertyGraphIndex

# Apply nest_asyncio to avoid runtime errors in async environments
nest_asyncio.apply()

# Initialize Azure AI Search vector store
vector_store = AzureAISearchVectorStore(
    search_or_index_client=index_client,
    index_name=INDEX_NAME,
    index_management=IndexManagement.CREATE_IF_NOT_EXISTS,
    id_field_key="id",
    chunk_field_key="text",
    embedding_field_key="embedding",
    embedding_dimensionality=1536,  # Adjust to match embedding model output (like ada-002)
    metadata_string_field_key="metadata",
    doc_id_field_key="doc_id",
    language_analyzer="en.lucene",
    vector_algorithm_type="exhaustiveKnn",
    compression_type="binary"
)

# # Construct the property graph index with implicit path extraction
# index = PropertyGraphIndex.from_documents(
#     documents,
#     llm=llm,
#     embed_model=embed_model,
#     vector_store=vector_store,
#     show_progress=True,
# )


Subtype value aml has no mapping, use base class VectorSearchVectorizer.
Subtype value aml has no mapping, use base class VectorSearchVectorizer.
Subtype value aml has no mapping, use base class VectorSearchVectorizer.
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 16.11it/s]
Extracting paths from text: 100%|██████████| 11/11 [00:08<00:00,  1.37it/s]
Extracting implicit paths: 100%|██████████| 11/11 [00:00<?, ?it/s]
Generating embeddings: 100%|██████████| 2/2 [00:00<00:00,  3.48it/s]
Generating embeddings: 100%|██████████| 29/29 [00:01<00:00, 24.81it/s]


## Step 4: Schema-Guided Extraction
In this step, we define a schema to guide the knowledge graph extraction, specifying allowed entity types and relationships. This provides structure to the graph extraction process.

In [18]:
# Schema-Guided Extraction Method
from typing import Literal
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor

# Define the schema for entity types and relationships
entities = Literal["PERSON", "PLACE", "THING"]
relations = Literal["PART_OF", "HAS", "IS_A"]
schema = {
    "PERSON": ["HAS", "IS_A"],
    "PLACE": ["PART_OF", "HAS"],
    "THING": ["IS_A"],
}

# Initialize the schema-based extractor
kg_extractor = SchemaLLMPathExtractor(
    llm=llm,
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=schema,
    strict=True,  # Disallow extractions outside the schema
)

# Construct the property graph index using the schema-guided extractor
index_schema = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor],
    vector_store=vector_store,
    show_progress=True,
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 27.42it/s]
Extracting paths from text with schema: 100%|██████████| 11/11 [00:43<00:00,  3.93s/it]
Generating embeddings: 100%|██████████| 2/2 [00:00<00:00,  3.27it/s]
Generating embeddings: 100%|██████████| 10/10 [00:00<00:00, 20.26it/s]


## Step 5: Free-Form Extraction
Here, we allow the LLM to freely infer the entities and relationships without a pre-defined schema. This method relies on the LLM's contextual understanding of the text to build the graph.

In [19]:
# Free-Form Extraction Method
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

# Initialize the free-form extractor
kg_extractor_free = SimpleLLMPathExtractor()

# Construct the property graph index using free-form extraction
index_free_form = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor_free],
    vector_store=vector_store,
    show_progress=True,
)


Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 35.21it/s]
Extracting paths from text: 100%|██████████| 11/11 [00:04<00:00,  2.48it/s]
Generating embeddings: 100%|██████████| 2/2 [00:00<00:00,  3.46it/s]
Generating embeddings: 100%|██████████| 22/22 [00:00<00:00, 31.38it/s]


In [22]:
# Querying the Property Graph: Keyword and Vector Similarity
from llama_index.core.indices.property_graph import VectorContextRetriever

# Initialize a vector context retriever
vector_retriever = VectorContextRetriever(
    index.property_graph_store,
    vector_store=vector_store,
    embed_model=embed_model,
    similarity_top_k=3  # Retrieve top 3 similar nodes
)

# Retrieve nodes based on a query
retriever = PropertyGraphIndex.as_retriever(sub_retrievers=[vector_retriever])
results = retriever.retrieve("Democracy in the United States")

# Display results
for result in results:
    print(result)


TypeError: PropertyGraphIndex.as_retriever() missing 1 required positional argument: 'self'

## Step 5: Free-Form Extraction
Here, we allow the LLM to freely infer the entities and relationships without a pre-defined schema. This method relies on the LLM's contextual understanding of the text to build the graph.

In [14]:
# Free-Form Extraction Method
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

# Initialize the free-form extractor
kg_extractor_free = SimpleLLMPathExtractor(llm=llm)

# Construct the property graph index using free-form extraction
index_free_form = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor_free],
    vector_store=vector_store,
    show_progress=True,
)



Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 20.54it/s]

[A
[A
[A
[A
[A
[A
[A
Extracting paths from text: 100%|██████████| 11/11 [00:04<00:00,  2.68it/s]

[A

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: c39162c5********************5418. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

## Step 6: Query the Property Graph
We can now query the property graph using both keyword-based and vector similarity-based methods. Here, we demonstrate how to search using vector embeddings stored in Azure AI Search.

In [24]:
# Querying the Property Graph: Keyword and Vector Similarity
from llama_index.core.indices.property_graph import VectorContextRetriever

# Initialize a vector context retriever
vector_retriever = VectorContextRetriever(
    index.property_graph_store,
    vector_store=vector_store,
    embed_model=embed_model,
    similarity_top_k=3  # Retrieve top 3 similar nodes
)

# Call `as_retriever()` on the `index` instance, not the class
retriever = index.as_retriever(sub_retrievers=[vector_retriever])

# Retrieve nodes based on a query
results = retriever.retrieve("Democracy in the United States")

# Display results
for result in results:
    print(result)


AttributeError: 'NoneType' object has no attribute 'search'

In [25]:
print(f"Search Client: {search_client}")


Search Client: <SearchClient [endpoint='https://fsunavala-ai-search.search.windows.net', index='llamaindex-property-graph']>


## Property Graph Construction

In [37]:
index.property_graph_store.save_networkx_graph(name="./kg.html")