Retrieval-Augmented Generation (RAG) Setup

In [1]:
# Activate virtual Python environment
#! source ../venv/bin/activate

In [2]:
#! uv pip install -r requirements.txt

Environment setup

In [None]:
import os
from dotenv import load_dotenv

from langchain_elasticsearch import ElasticsearchStore
from langchain_aws import BedrockEmbeddings
from langchain_aws import BedrockLLM
from langchain.chains import RetrievalQA
from urllib.request import urlopen

import boto3
import json


# Load all environment variables from .env file
load_dotenv()

ELASTIC_CLOUD_ENDPOINT = os.getenv('ELASTIC_CLOUD_ENDPOINT')
ELASTIC_API_KEY = os.getenv('ELASTIC_API_KEY')
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_KEY')
AWS_REGION = os.getenv('AWS_REGION')
ELASTIC_CLOUD_ID = os.getenv('ELASTIC_CLOUD_ID')
bedrock_client = boto3.client(
    service_name = "bedrock-runtime",
    region_name = AWS_REGION
)

# NOTE: Use an embedding-capable model id here.
# If you use a text-generation model (e.g. amazon.titan-text-express-v1),
# the embedding call may return None which causes errors downstream
# (TypeError: object of type 'NoneType' has no len()).
bedrock_embedding = BedrockEmbeddings(
    client=bedrock_client,
    model_id="amazon.titan-embed-text-v2:0",  # embedding model
)

Create Elasticsearchstore

In [4]:

vector_store = ElasticsearchStore(
    es_url=ELASTIC_CLOUD_ENDPOINT,
    es_api_key=ELASTIC_API_KEY,
    index_name="marketplace_index",
    embedding=bedrock_embedding,
)

In [5]:
import tiktoken
url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/example-apps/chatbot-rag-app/data/data.json"

response = urlopen(url)

workplace_docs = json.loads(response.read())

from langchain.text_splitter import RecursiveCharacterTextSplitter

metadata = []
content = []

for doc in workplace_docs:
    content.append(doc["content"])
    metadata.append(
        {
            "name": doc["name"],
            "summary": doc["summary"],
            "rolePermissions": doc["rolePermissions"],
        }
    )

In [6]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=256
)
docs = text_splitter.create_documents(content, metadatas=metadata)
#vector_store.add_documents(docs)

In [7]:
documents = vector_store.from_documents(
    docs,
    es_url=ELASTIC_CLOUD_ENDPOINT,
    es_api_key=ELASTIC_API_KEY,
    index_name="marketplace_index",
    embedding=bedrock_embedding,
)

TypeError: object of type 'NoneType' has no len()