In [None]:
import openai
import boto3
import pprint
from typing import Tuple, List, Callable, Optional

BEDROCK_CLIENT = boto3.client('bedrock-agent-runtime', region_name='us-east-1') # note the region
KNOWLEDGE_BASE_ID ='ECZYEUIJ59' # this is the id of the decisions knowledge base in english


ssm_client = boto3.client('ssm', region_name='eu-west-1')
open_ai_key = ssm_client.get_parameter(Name='llm-hackathon-openai-key')['Parameter']['Value']
OPENAI_CLIENT = openai.OpenAI(
    api_key=open_ai_key
)


GPT_4 = 'gpt-4-turbo'
GPT_3 = 'gpt-3.5-turbo-0125'

In [None]:
# wrapper around the bedrock client retrieval method
def retrieve(search_string, items=10):
    retrievals = BEDROCK_CLIENT.retrieve(
        knowledgeBaseId=KNOWLEDGE_BASE_ID,
        retrievalQuery={
            'text': search_string
        },
        retrievalConfiguration={
            'vectorSearchConfiguration': {
                'numberOfResults': items
            }
        }
    )
    return retrievals['retrievalResults']


# wrapper around the openai api
def generate(prompt:str, model)->str:
    api_response = OPENAI_CLIENT.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model=model,
    )
    return api_response.choices[0].message.content

# retrieval improvements

In [None]:
# one method of improving RAG performance is by trying to improve our retrieval
# here we can get creative

def make_verbose(query: str) -> str:
    model = GPT_3
    prompt = f"""
    Take the query given below and rephrase it in a slightly more verbose way - expanding on the concept of the question asked
    specially in relation to flemish politics
    query: {query}
    """
    return generate(prompt, model)

def attempt_answer(query: str) -> str:
    model = GPT_3
    prompt = f"""
    Provide a hypothetical response to the query given below, the response should relate to flemish politics
    query: {query}
    """
    return generate(prompt, model)
    

def break_into_facets(query: str) -> List[str]:
    model = GPT_3
    facets = 3
    prompt = f"""
    Take the query given below and generate {facets} statements which could be related to it in the context of 
    flemish politics, the generated statements should ideally focus on different aspects of the query.
    provide the statements as a list with each element starting with an *
    query: {query}
    """
    response = generate(prompt, model)
    generated_facets = response.split('*')
    return [item for item in generated_facets if len(item)>1]

In [None]:
pprint.pprint(make_verbose("farmers"))

In [None]:
pprint.pprint(attempt_answer("what are we doing about the borders"))

In [None]:
pprint.pprint(break_into_facets("are we doing things for the climate"))

In [None]:
def RAG_retrieval_augmentation(query:str,model: str, num_retrivals:int, augmentation_method: Optional[Callable]=None)-> Tuple[str, str]:
    
    if augmentation_method:
        retrieval_query = augmentation_method(query)
    else:
        retrieval_query = query


    if type(retrieval_query) == str:
        retrieval_query = [retrieval_query]

    context_items_nested = [retrieve(item, num_retrivals) for item in retrieval_query]
    context_items = []
    [context_items.extend(item) for item in context_items_nested]


    flat_items = [f"{index} : {item['content']['text']}" for index, item in enumerate(context_items)]
    flat_items = "\n".join(flat_items)

    prompt = f"""
                Answer the given question using only the information in the sources section below.
                Respond in a conversational manner, not with a list.
                Enter the index number of the source used between single square brackets, such as [1] or [2][3][4].
                do not combine references together in one bracket

                question:{query}
                sources: {flat_items}
                your answer:
    """
    return generate(prompt, model), flat_items

In [None]:
pprint.pprint(RAG_retrieval_augmentation("healthcare",GPT_4, 5, augmentation_method=None))

In [None]:
pprint.pprint(RAG_retrieval_augmentation("healthcare",GPT_4, 5, augmentation_method=make_verbose))

In [None]:
pprint.pprint(RAG_retrieval_augmentation("healthcare",GPT_4, 5, augmentation_method=attempt_answer))

In [None]:
pprint.pprint(RAG_retrieval_augmentation("healthcare",GPT_4, 5, augmentation_method=break_into_facets))

# Reranking Approaches

In [None]:
query = "what are we doing for children"
facets = break_into_facets(query)
pprint.pprint(facets)

for facet in facets:
    retrievals = retrieve(facet, 5)
    scores = [item['score'] for item in retrievals]
    print(scores)

In [None]:
#the crux is that we want to ensure that the most relevant pieces of context are closest to the edge of the context window

def rerank_on_similarity_score(retrievals: List[dict])-> List[dict]:
    # this is a simple reranker which just looks at similarity scores
    # use in combination with retrieval augmentation methods which sample the vector db multiple times
    return sorted(retrievals, key=lambda k: k['score'])


In [None]:
def RAG_retrieval_augmentation_and_reranking(query:str,model: str, num_retrivals:int,
    augmentation_method: Optional[Callable] = None, reranking_method: Optional[Callable]= None)-> Tuple[str, str]:
    

    if augmentation_method:
        retrieval_query = augmentation_method(query)
    else:
        retrieval_query = query


    if type(retrieval_query) == str:
        retrieval_query = [retrieval_query]

    context_items_nested = [retrieve(item, num_retrivals) for item in retrieval_query]
    context_items = []
    [context_items.extend(item) for item in context_items_nested]


    if reranking_method:
        context_items = reranking_method(context_items)


    flat_items = [f"{index} : {item['content']['text']}" for index, item in enumerate(context_items)]
    flat_items = "\n".join(flat_items)

    prompt = f"""
                Answer the given question using only the information in the sources section below.
                Respond in a conversational manner, not with a list.
                Enter the index number of the source used between single square brackets, such as [1] or [2][3][4].
                do not combine references together in one bracket

                question:{query}
                sources: {flat_items}
                your answer:
    """
    return generate(prompt, model), flat_items

In [None]:
pprint.pprint(RAG_retrieval_augmentation_and_reranking("what are we doing for children",GPT_4, 5,
                augmentation_method=break_into_facets,
                reranking_method=rerank_on_similarity_score))