## Install Dependencies

In [1]:
# Install dependencies
!pip install -e .

Obtaining file:///home/ec2-user/SageMaker/nano-graphrag
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: nano-graphrag
  Attempting uninstall: nano-graphrag
    Found existing installation: nano-graphrag 0.0.8.2
    Uninstalling nano-graphrag-0.0.8.2:
      Successfully uninstalled nano-graphrag-0.0.8.2
[33m  DEPRECATION: Legacy editable install of nano-graphrag==0.0.8.2 from file:///home/ec2-user/SageMaker/nano-graphrag (setup.py develop) is deprecated. pip 25.0 will enforce this behaviour change. A possible replacement is to add a pyproject.toml or enable --use-pep517, and use setuptools >= 64. If the resulting installation is not behaving as expected, try using --config-settings editable_mode=compat. Please consult the setuptools documentation for more information. Discussion can be found at https://github.com/pypa/pip/issues/11457[0m[33m
[0m  Running setup.py develop for nano-graphrag
Successfully installed nano-graphrag


## Import Required Libraries

In [14]:
import os
import time
import json
import boto3
import logging
import asyncio
import litellm
from pathlib import Path
from nano_graphrag import GraphRAG, QueryParam
from litellm import completion
from litellm import RateLimitError
from nano_graphrag import GraphRAG, QueryParam

## Setup Logging

In [3]:
# Setup logging
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)
os.environ["AWS_REGION_NAME"] = "us-east-1"

### Define Environment Variable

In [4]:
os.environ["AWS_REGION_NAME"] = "us-east-1"

## Assume Cross-Account Role

In [5]:
ROLE_TO_ASSUME = Path(os.path.join(os.environ["HOME"],"BedrockCrossAccount.txt")).read_text().strip()
logger.info(f"ROLE_TO_ASSUME={ROLE_TO_ASSUME}")

INFO:__main__:ROLE_TO_ASSUME=arn:aws:iam::605134468121:role/BedrockCrossAccount


In [6]:
# ARN of Role A to assume  
role_to_assume = 'arn:aws:iam::605134468121:role/BedrockCrossAccount'    
  
# Use STS to assume role  
credentials = boto3.client('sts').assume_role(  
    RoleArn=ROLE_TO_ASSUME,  
    RoleSessionName='RoleBSession'  
)['Credentials']  
os.environ["AWS_ACCESS_KEY_ID"] = credentials['AccessKeyId']
os.environ["AWS_SECRET_ACCESS_KEY"] = credentials['SecretAccessKey']
os.environ["AWS_SESSION_TOKEN"] = credentials['SessionToken']

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [20]:
# Confirm cross account access
!aws sts get-caller-identity

{
    "UserId": "AROAYZZGTGQMSXMXAZQY7:RoleBSession",
    "Account": "605134468121",
    "Arn": "arn:aws:sts::605134468121:assumed-role/BedrockCrossAccount/RoleBSession"
}


## Read Data

In [8]:
!curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > book.txt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  184k  100  184k    0     0  2098k      0 --:--:-- --:--:-- --:--:-- 2124k


## Keeping the list of Models handy

In [9]:
model_id_list = ["bedrock/us.meta.llama3-1-70b-instruct-v1:0",
                 "bedrock/us.meta.llama3-2-1b-instruct-v1:0",
                 "bedrock/us.meta.llama3-2-3b-instruct-v1:0",
                 "bedrock/us.meta.llama3-2-11b-instruct-v1:0",
                 "bedrock/us.meta.llama3-2-90b-instruct-v1:0",
                 "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0",
                 "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0",
                 "bedrock/amazon.nova-lite-v1:0",
                 "bedrock/amazon.nova-micro-v1:0",
                 "bedrock/amazon.nova-pro-v1:0"]

## Nano GraphRAG

In [10]:
# Use Amazon Bedrock models explicitly
graph_func = GraphRAG(
    working_dir="./dickens",
    using_amazon_bedrock=True,
    best_model_id="us.meta.llama3-1-8b-instruct-v1:0",  # Use a working model
    cheap_model_id="us.meta.llama3-1-8b-instruct-v1:0",  # Use same model to avoid permission issues
)

INFO:nano-graphrag:Switched the default openai funcs to Amazon Bedrock
INFO:nano-graphrag:Load KV full_docs with 0 data
INFO:nano-graphrag:Load KV text_chunks with 0 data
INFO:nano-graphrag:Load KV llm_response_cache with 0 data
INFO:nano-graphrag:Load KV community_reports with 0 data
INFO:nano-graphrag:Loaded graph from ./dickens/graph_chunk_entity_relation.graphml with 0 nodes, 0 edges
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens/vdb_entities.json'} 0 data


In [11]:
# Read book content
book_path = "./book.txt"
try:
    with open(book_path, "r", encoding="utf-8") as f:
        book_text = f.read()
except FileNotFoundError:
    logger.error("File 'book.txt' not found.")
    raise

In [12]:
# Ensure async insert works properly
async def async_insert_book():
    try:
        logger.info("Inserting book text into GraphRAG...")
        await graph_func.ainsert(book_text)  # Use async insert
        logger.info("Data insertion completed successfully!")
    except Exception as e:
        logger.error(f"Failed to insert book text: {e}")
        raise

In [15]:
# Handle event loop issues in Jupyter
try:
    loop = asyncio.get_running_loop()
    task = loop.create_task(async_insert_book())
except RuntimeError:
    asyncio.run(async_insert_book())

INFO:__main__:Inserting book text into GraphRAG...
INFO:nano-graphrag:[New Docs] inserting 1 docs
INFO:nano-graphrag:[New Chunks] inserting 42 chunks
INFO:nano-graphrag:[Entity Extraction]...
INFO:aiobotocore.credentials:Found credentials in environment variables.


⠹ Processed 42(100%) chunks,  896 entities(duplicated), 264 relations(duplicated)

INFO:nano-graphrag:Inserting 693 vectors to entities





INFO:nano-graphrag:Writing graph with 694 nodes, 250 edges
ERROR:__main__:Failed to insert book text: An error occurred (AccessDeniedException) when calling the InvokeModel operation: You don't have access to the model with the specified model ID.


In [18]:
# Perform a **Global** GraphRAG query
query_global = "What are the top themes in this story?"
async def async_query_global():
    try:
        logger.info(f"Performing global query: {query_global}")
        response_global = await graph_func.aquery(query_global)
        logger.info(f"Global Query Response: {response_global}")
    except Exception as e:
        logger.error(f"Global query failed: {e}")
        raise

# Handle event loop issues in Jupyter
try:
    loop = asyncio.get_running_loop()
    loop.create_task(async_query_global())
except RuntimeError:
    asyncio.run(async_query_global())

INFO:__main__:Performing global query: What are the top themes in this story?
INFO:__main__:Global Query Response: Sorry, I'm not able to provide an answer to that question.


In [19]:
# Perform a **Local** GraphRAG query
query_local = "Summarize the main plot of the book."
async def async_query_local():
    try:
        logger.info(f"Performing local query: {query_local}")
        response_local = await graph_func.aquery(query_local, param=QueryParam(mode="local"))
        logger.info(f"Local Query Response: {response_local}")
    except Exception as e:
        logger.error(f"Local query failed: {e}")
        raise

#Handle event loop issues in Jupyter
try:
    loop = asyncio.get_running_loop()
    loop.create_task(async_query_local())
except RuntimeError:
    asyncio.run(async_query_local())

INFO:__main__:Performing local query: Summarize the main plot of the book.
