In [1]:
import os
import logging
from pathlib import Path
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from colorama import init, Fore
from botocore.config import Config
from langchain_core.tools import tool
from utils import create_bedrock_client
from fmbench_rag_setup import FMBenchRagSetup
from fastapi import FastAPI, HTTPException
from typing import List, Optional
from langchain_aws import ChatBedrockConverse
from fastapi.responses import RedirectResponse
from guardrails import BedrockGuardrailManager
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage, SystemMessage

In [2]:

bedrock_role_arn = os.environ.get("BEDROCK_ROLE_ARN")
_rag_system = FMBenchRagSetup(bedrock_role_arn=bedrock_role_arn).setup()


2025-04-06 21:07:07.262,INFO,p1547739,fmbench_rag_setup.py,149,Initializing Bedrock client for region: us-east-1
2025-04-06 21:07:07.344,INFO,p1547739,fmbench_rag_setup.py,107,Bedrock client initialized
2025-04-06 21:07:07.346,INFO,p1547739,fmbench_rag_setup.py,188,Loading vector store from indexes/fmbench_index
2025-04-06 21:07:07.537,INFO,p1547739,fmbench_rag_setup.py,190,Successfully loaded vector store from indexes/fmbench_index
2025-04-06 21:07:07.544,INFO,p1547739,fmbench_rag_setup.py,311,RAG setup complete


In [3]:
        
# Use the RAG system to answer the question
question = "can we benchmark on nvidia gpu on g6e instances"
result = _rag_system.query(question)

2025-04-06 21:07:07.563,INFO,p1547739,fmbench_rag_setup.py,408,Processing query: can we benchmark on nvidia gpu on g6e instances


('\n'
 '\n'
 "result={'input': 'can we benchmark on nvidia gpu on g6e instances', "
 "'context': [Document(id='d583ff09-66a7-4578-9d59-6b0e572d6e41', "
 "metadata={'url': "
 "'https://aws-samples.github.io/foundation-model-benchmarking-tool/benchmarking_on_ec2.html', "
 "'title': 'EC2 - Foundation Model Benchmarking Tool (FMBench)', 'favicon': "
 "{}, 'language': 'en', 'scrapeId': '5c876876-7753-46a4-a81a-53ce6e9e2438', "
 "'viewport': 'width=device-width,initial-scale=1', 'generator': "
 "'mkdocs-1.6.1, mkdocs-material-9.6.11', 'sourceURL': "
 "'https://aws-samples.github.io/foundation-model-benchmarking-tool/benchmarking_on_ec2.html', "
 "'statusCode': 200, 'theme-color': '#00000000', 'color-scheme': 'normal'}, "
 "page_content='[Skip to "
 'content](https://aws-samples.github.io/foundation-model-benchmarking-tool/benchmarking_on_ec2.html#benchmark-models-on-ec2)\\n\\n# '
 'Benchmark models on EC2 '
 '[¶](https://aws-samples.github.io/foundation-model-benchmarking-tool/benchmarking_o

In [4]:
print(result)

Yes, you can benchmark on an instance type with an NVIDIA GPU on g6e instances using the Foundation Model Benchmarking Tool (FMBench). Here are the steps to follow:

1. **Connect to your EC2 instance**: Use SSH or EC2 Connect to access your instance.

2. **Install `uv`**: Run the following command to install `uv` on your instance, which is used to create a new virtual environment for `FMBench`.

    ```sh
    curl -LsSf https://astral.sh/uv/install.sh | sh
    export PATH="$HOME/.local/bin:$PATH"
    ```

3. **Install `docker-compose`**:

    ```sh
    DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker}
    mkdir -p $DOCKER_CONFIG/cli-plugins
    sudo curl -L https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m) -o $DOCKER_CONFIG/cli-plugins/docker-compose
    sudo chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose
    docker compose version
    ```

4. **Build the `vllm` container**:

    ```sh
    # Clone the vLLM project repository from GitHub
 