# Multi-Tenancy

## Setup

If you haven't already, install the toolkit and dependencies using the [Setup](./00-Setup.ipynb) notebook.

### Create a tenant-specific graph

In this example, we'll create an `aoss` tenant-specific graph for information about Amazon OpenSearch Serverless.

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphIndex, GraphRAGConfig, set_logging_config
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

from llama_index.readers.web import SimpleWebPageReader

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

graph_index = LexicalGraphIndex(
    graph_store, 
    vector_store,
    tenant_id='aoss'
)

doc_urls = [
    'https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-overview.html',
    'https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-comparison.html'
]

docs = SimpleWebPageReader(
    html_to_text=True,
    metadata_fn=lambda url:{'url': url}
).load_data(doc_urls)

graph_index.extract_and_build(docs, show_progress=True)

print('Complete')

### Query a tenant-specific graph

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    tenant_id='aoss'
)

response = query_engine.query("What are the differences between OpenSearch Service and OpenSearch Serverless?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")

#### Check the default graph can't answer the same question

If you have previously populated the default graph with details about Neptune Database and Neptune Analytics, you shoud get a response that says: "The provided information does not contain any specific details about OpenSearch Service or OpenSearch Serverless. The search results primarily focus on Amazon Neptune, Neptune Analytics, and Neptune Database."

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store
)

response = query_engine.query("What are the differences between OpenSearch Service and OpenSearch Serverless?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")

### And check that the `aoss` graph can't answer Neptune questions

"The search results do not contain any relevant information about Neptune Database or Neptune Analytics. To properly answer this question, I would need additional information specifically about these two services."

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    tenant_id='aoss'
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")

#### Multi-tenancy and metadata filtering

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.metadata import FilterConfig

from llama_index.core.vector_stores.types import FilterOperator, MetadataFilter

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    tenant_id='aoss',
    filter_config = FilterConfig(
        MetadataFilter(
            key='url',
            value='https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-comparison.html',
            operator=FilterOperator.EQ
        )
    )
)

response = query_engine.query("What are the differences between OpenSearch Service and OpenSearch Serverless?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")