In [2]:
import os
import json
import time
import requests
import random
import base64
from collections import OrderedDict
import urllib.request
# from tqdm import tqdm
import openai
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import load_dotenv
from typing import List

load_dotenv()
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-03-15-preview"
openai.api_key = os.getenv("OPENAI_API_KEY")
DEPLOYMENT_NAME = os.getenv("DEPLOYMENT_NAME")
MODEL_NAME = os.getenv("MODEL_NAME")
ENVIRONMENT = os.getenv("ENVIRONMENT")
embedder = OpenAIEmbeddings(deployment="text-embedding-ada-002") 


In [3]:
def get_search_results(query: str, indexes: list, 
                       k: int = 5,
                       reranker_threshold: int = 1,
                       sas_token: str = "",
                       vector_search: bool = False,
                       similarity_k: int = 3, 
                       query_vector: list = []) -> List[dict]:
    
    headers = {'Content-Type': 'application/json','api-key': os.environ["AZURE_SEARCH_KEY"]}
    params = {'api-version': os.environ['AZURE_SEARCH_API_VERSION']}

    agg_search_results = dict()
    
    for index in indexes:
        search_payload = {
            "search": query,
            "queryType": "semantic",
            "semanticConfiguration": "my-semantic-config",
            "count": "true",
            "speller": "lexicon",
            "queryLanguage": "en-us",
            "captions": "extractive",
            "answers": "extractive",
            "top": k
        }
        if vector_search:
            search_payload["vectors"]= [{"value": query_vector, "fields": "contentVector","k": k}]
            search_payload["select"]= "id, title, content, name, location"
        else:
            search_payload["select"]= "id, title, content, name, location"
        

        resp = requests.post(os.environ['AZURE_SEARCH_ENDPOINT'] + "/indexes/" + index + "/docs/search",
                         data=json.dumps(search_payload), headers=headers, params=params)

        search_results = resp.json()
        agg_search_results[index] = search_results

    content = dict()
    ordered_content = OrderedDict()
    
    for index,search_results in agg_search_results.items():
        for result in search_results['value']:
            if result['@search.rerankerScore'] > reranker_threshold: # Show results that are at least N% of the max possible score=4
                content[result['id']]={
                                        "title": result['title'], 
                                        "name": result['name'], 
                                        "location": result['location'] + sas_token if result['location'] else "",
                                        "caption": result['@search.captions'][0]['text'],
                                        "index": index
                                    }
                if vector_search:
                    content[result['id']]["content"]= result['content']
                    content[result['id']]["score"]= result['@search.rerankerScore'] # Uses the reranker score
                    # content[result['id']]["vectorized"]= result['vectorized']              
                else:
                    content[result['id']]["content"]= result['content']
                    content[result['id']]["score"]= result['@search.score'] # Uses the Hybrid RRF score
                
    # After results have been filtered, sort and add the top k to the ordered_content
    if vector_search:
        topk = similarity_k
    else:
        topk = k*len(indexes)
        
    count = 0  # To keep track of the number of results added
    for id in sorted(content, key=lambda x: content[x]["score"], reverse=True):
        ordered_content[id] = content[id]
        count += 1
        if count >= topk:  # Stop after adding 5 results
            break

    return ordered_content

In [4]:
# QUESTION = "Low vision devices coverage?"
QUESTION = "Formulary preferred generic drugs for prescription drugs at retail pharmacy coverage in-network providers?"
# QUESTION = "Can my benefit cover the device cost for Therapeutic?"
# QUESTION = "Can my benefit cover the device cost for Therapeutic?"


In [5]:
ordered_results = get_search_results(QUESTION, ["demo_index_0"], 
                                        k=3,
                                        )

ordered_results

OrderedDict([('UHJlZmVycmVkX0dvbGRfRVBPXzE1MDBfQmVuZWZpdF8yMDIyX2luX1dhc2hpbmd0b25fMzY=',
              {'title': '49831WA194  (01-2022) 31 Preferred Gold   We credit the difference to premium rates for the next benefit year  ',
               'name': 'Preferred_Gold_EPO_1500_Benefit_2022_in_Washington_36',
               'location': 'https://openaiembedding.blob.core.windows.net/document-chunks/Preferred_Gold_EPO_1500_Benefit_2022_in_Washington_36.txt',
               'caption': "Your prescription drug benefit uses a drug list . (This is sometimes referred to as a formulary. ) We review \r medical studies, scientific literature and other pharmaceutical information to choose safe and effective drugs \r for the p rescription drug formulary . This plan doesn 't cover certain categories of drugs .",
               'index': 'demo_index_0',
               'content': "49831WA194  (01-2022) 31 Preferred Gold   We credit the difference to premium rates for the next benefit year  \r\nIf your be

In [6]:
ordered_results = get_search_results(QUESTION, ["demo_index_vector"], 
                                        k=3,
                                        reranker_threshold=1,
                                        vector_search=True, 
                                        similarity_k=5,
                                        query_vector = embedder.embed_query(QUESTION)
                                        )
ordered_results

OrderedDict([('UHJlZmVycmVkX0dvbGRfRVBPXzE1MDBfQmVuZWZpdF8yMDIzX2luX1dhc2hpbmd0b25fMzI=',
              {'title': '49831WA194  (01-2023) 27 Preferred Gold  services such as x -rays, lab work, therapeutic injections, facility fees, office surgeries and medical equipment and ',
               'name': 'Preferred_Gold_EPO_1500_Benefit_2023_in_Washington_32',
               'location': 'https://openaiembedding.blob.core.windows.net/document-chunks/Preferred_Gold_EPO_1500_Benefit_2023_in_Washington_32.txt',
               'caption': 'Prescription Drug Formulary  \r This benefit uses a speci fic list of covered prescription drugs, sometimes referred to as a formulary. Our \r Pharmacy and Therapeutics Committee, which includes medical practitioners and pharmacists from the \r community, frequently reviews current medical studies and pharmaceutical information .',
               'index': 'demo_index_vector',
               'content': "49831WA194  (01-2023) 27 Preferred Gold  services such as x 