### Install Python Packages

In [None]:
!pip install azure-functions
!pip install azure-core
!pip install azure-cosmos
!pip install openai
!pip install numpy
!pip install requests
!pip install pandas
!pip install azure-storage-blob 
!pip install azure-identity
!pip install smart_open
!pip install tenacity
!pip install pinecone-client
!pip install redis
!pip install tiktoken
!pip install azure-storage-file-share
!pip install python-dotenv
!pip install azure-search-documents
!pip install azure-ai-formrecognizer

### Import Packages

In [3]:
import os
from dotenv import load_dotenv
load_dotenv(override=True)

import shutil
import sys
sys.path.append('./utils')

### Uncomment below imports as needed -- make sure that all relevant values and keys in the .env file are properly populated
# from utils import redis_helpers
# from utils import helpers
# from utils import language
# from utils import openai_helpers
# from utils import storage
# from utils import bot_helpers

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Activate Cognitive Search Ingestion

In [2]:
#### Ingest all knowledge base documents
from utils import cogsearch_helpers

cogsearch_helpers.ingest_kb()

Index km-openai Deleted
Index km-openai created
Deleted Skillset - km-openai-skills
Created new Skillset - km-openai-skills
Deleted Indexer - km-openai-indexer
Deleted Data Source - km-openai-skills
Created new Data Source Connection - km-openai-docs
Created new Indexer - km-openai-indexer
Running Indexer km-openai-indexer


### Activate Form Recognizer Ingestion

In [8]:
#### Ingest all form documents

from utils import storage
from utils import fr_helpers

FR_CONTAINER = os.environ['FR_CONTAINER']
OUTPUT_BLOB_CONTAINER = os.environ['OUTPUT_BLOB_CONTAINER']


fr_helpers.process_forms(in_container = FR_CONTAINER, out_container = OUTPUT_BLOB_CONTAINER)



Processing now 'service manual-pages-8-33_1-1.pdf' with SAS URL https://storagekmopai.blob.core.windows.net/kmoaiforms/service%20manual-pages-8-33_1-1.pdf?se=2028-03-18T16%3A20%3A44Z&sp=r&sv=2021-08-06&sr=b&sig=3GZP1Rn1ui8l5P2qODs4Qayqaz6AnT0/qEPL/1unuqw%3D
https://storagekmopai.blob.core.windows.net/kmoaiforms/service%20manual-pages-8-33_1-1.pdf
Processing now 'service manual-pages-8-33_10-10.pdf' with SAS URL https://storagekmopai.blob.core.windows.net/kmoaiforms/service%20manual-pages-8-33_10-10.pdf?se=2028-03-18T16%3A20%3A56Z&sp=r&sv=2021-08-06&sr=b&sig=jMeuz65SW6NYXV4vmmDGCjAarbE%2BNTUQLwfCHkdxojA%3D
https://storagekmopai.blob.core.windows.net/kmoaiforms/service%20manual-pages-8-33_10-10.pdf
Processing now 'service manual-pages-8-33_11-11.pdf' with SAS URL https://storagekmopai.blob.core.windows.net/kmoaiforms/service%20manual-pages-8-33_11-11.pdf?se=2028-03-18T16%3A21%3A02Z&sp=r&sv=2021-08-06&sr=b&sig=1UwoJLUhqxstb6SrcDWz2e3oZCVplh4eS8HQlcizvjs%3D
https://storagekmopai.blob.core.

In [9]:
import requests

response = requests.post("https://funcapphukwe.azurewebsites.net/api/BotQnAHTTPFunc?code=HArUtZXiUTsjSAVuqmn1RsZqK4_Bi2Vl-nkDvTbTsHoQAzFuw-aCJw==", json={"query": "ما هي رسوم تجديد البطاقة الصحية في قطر؟"})

In [10]:
response.status_code

200

In [11]:
import json
print("start: ", json.loads(response.text)["answer"])

with open("../test_context_2.txt", "w", encoding="utf-8") as f:
    f.write(json.loads(response.text)["context"])

with open("../test_answer_2.txt", "w", encoding="utf-8") as f:
    f.write(json.loads(response.text)["answer"])

start:  البطاقة الصحية الصادرة في قطر مجانية وصالحة لمدة خمس سنوات. بعد فترة الخمس سنوات ، يجب تجديد البطاقة. رسوم التجديد للقطريين هي 100 ريال قطري و 50 ريال قطري لغير القطريين. قد يتم تطبيق رسوم إضافية على الفحوصات الطبية واختبارات الرؤية.


### Interrogate the APIs with the sample Knowledge Base

In [3]:
### Use this cell to query Redis with the below queries
import json
from utils import bot_helpers

DAVINCI_003_COMPLETIONS_MODEL = os.environ['DAVINCI_003_COMPLETIONS_MODEL']
ADA_002_EMBEDDING_MODEL = os.environ['ADA_002_EMBEDDING_MODEL']

 
queries = [
       "ما هي مهارات تكنولوجيا المعلومات والاتصالات اللازمة لرؤية حكومة قطر الإلكترونية؟"
    ]


for q in queries:
    output = bot_helpers.openai_interrogate_text(q, DAVINCI_003_COMPLETIONS_MODEL, ADA_002_EMBEDDING_MODEL, 5, False)
    output = json.loads(output)
    print("\n\n", output['answer'], '\n\n\n###############################')
    


ServiceRequestError: Invalid URL '': No scheme supplied. Perhaps you meant https://?

## Experimentation Code Below - NO NEED TO RUN 
### For your reference only

In [1]:
#### Reset Index in Redis
from utils import redis_helpers

reset_index = True

if reset_index:
    redis_helpers.redis_reset_index(redis_helpers.get_new_conn())

Connected to redis


In [None]:
### Use this cell to load embeddings directly into Redis from this notebook

import json


CHOSEN_EMB_MODEL   = os.environ['CHOSEN_EMB_MODEL']
SMALL_EMB_TOKEN_NUM  = int(os.environ['SMALL_EMB_TOKEN_NUM'])
MEDIUM_EMB_TOKEN_NUM  = int(os.environ['MEDIUM_EMB_TOKEN_NUM'])
LARGE_EMB_TOKEN_NUM  = int(os.environ['LARGE_EMB_TOKEN_NUM'])


emb_documents = []


for item in os.listdir("dump"):
    path = os.path.join("dump", item)

    with open(path, 'r') as openfile:
        data = json.load(openfile)
        
    emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, SMALL_EMB_TOKEN_NUM,  text_suffix = 'S')

    if MEDIUM_EMB_TOKEN_NUM != 0:
        emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, MEDIUM_EMB_TOKEN_NUM, text_suffix = 'M')

    if LARGE_EMB_TOKEN_NUM != 0:
        emb_documents += helpers.generate_embeddings(data, CHOSEN_EMB_MODEL, LARGE_EMB_TOKEN_NUM,  text_suffix = 'L')


helpers.load_embedding_docs_in_redis(emb_documents)

In [None]:
emb_documents = []

emb_documents += helpers.generate_embeddings_from_json_docs('dump', ADA_002_EMBEDDING_MODEL, ADA_002_MODEL_MAX_TOKENS, text_suffix='XL', limit=-1)

print(f"Generated {len(emb_documents)} embeddings.")
helpers.save_embdding_docs_to_pkl(emb_documents, "test.pkl")

In [24]:
emb_documents = helpers.load_embedding_docs_from_pkl("test.pkl")
helpers.load_embedding_docs_in_redis(emb_documents)

Loading 141 embeddings into Redis


In [None]:
queries = [
        "in which classes did the Danish sailors qualify?",
        "what are the reviews of the Lost City hotel?"
    ]


for q in queries:
    output = bot_helpers.openai_interrogate_text(q, DAVINCI_003_COMPLETIONS_MODEL, ADA_002_EMBEDDING_MODEL, 5, False)
    print("\n\n", output, '\n\n\n###############################')
    break

