In [None]:
# aim of this notebook is to build the most basic rag pipeline to show the concept

import openai
import boto3
import pprint
from typing import Tuple

boto3.setup_default_session(profile_name='conveyor-demo-profile') # not needed on conveyor [remove]

BEDROCK_CLIENT = boto3.client('bedrock-agent-runtime', region_name='us-east-1') # note the region
KNOWLEDGE_BASE_ID ='ECZYEUIJ59' # this is the id of the decisions knowledge base in english


ssm_client = boto3.client('ssm', region_name='eu-west-1')
open_ai_key = ssm_client.get_parameter(Name='llm-hackathon-openai-key')['Parameter']['Value']
OPENAI_CLIENT = openai.OpenAI(
    api_key=open_ai_key
)


GPT_4 = 'gpt-4-turbo'

In [None]:
# wrapper around the bedrock client retrieval method
def retrieve(search_string, items=10):
    retrievals = BEDROCK_CLIENT.retrieve(
        knowledgeBaseId=KNOWLEDGE_BASE_ID,
        retrievalQuery={
            'text': search_string
        },
        retrievalConfiguration={
            'vectorSearchConfiguration': {
                'numberOfResults': items
            }
        }
    )
    return retrievals['retrievalResults']


# wrapper around the openai api
def generate(prompt:str, model)->str:
    api_response = OPENAI_CLIENT.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=model,
    )
    return api_response.choices[0].message.content

In [None]:
# RAG function
def RAG(query:str,model: str, num_retrivals:int)-> Tuple[str, str]:
    
    context_items = retrieve(query, num_retrivals)
    flat_items = [f"{index} : {item['content']['text']}" for index, item in enumerate(context_items)]
    flat_items = "\n".join(flat_items)

    prompt = f"""
                Answer the given question using only the information in the sources section below.
                Respond in a conversational manner, not with a list.
                Enter the index number of the source used between single square brackets, such as [1] or [2][3][4].
                do not combine references together in one bracket

                question:{query}
                sources: {flat_items}
                your answer:
    """
    return generate(prompt, model), flat_items

In [None]:
question = "what are we doing to respond to climate change in belgium"
response, retrieved_items = RAG(question, GPT_4, 5)
pprint.pprint(response)

In [None]:
pprint.pprint(retrieved_items)