# Step by Step Guide to a Dungeons and Dragons (DnD) Retrieval Augmented Generation (RAG) Copilot

## Set-up

### Import necessary Python Libraries

In [1]:
# Import the Necessary Python Libraries

# Import standard and non-Azure libraries
import os
import json
from dotenv import load_dotenv

# Import the Azure SDK libraries
from azure.storage.blob import BlobServiceClient, BlobClient
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

# Import the custom functions
import functions as fn


### Load environment variables from .env and configuration variables from config.json

In [2]:
load_dotenv() # populate environment variables from .env file

# Open the config.json file and read the configurations
with open('config.json', 'r') as f:
    config = json.load(f)

# Load environment variables from .env file
openai_api_key: str = os.environ["OPENAI_API_KEY"]
openai_api_base: str = os.environ["OPENAI_API_BASE"]
vector_store_address: str = os.environ['SEARCH_ENDPOINT']
vector_store_password: str = os.environ['SEARCH_KEY']
document_intelligence_endpoint: str = os.environ["DOCUMENT_INTELLIGENCE_ENDPOINT"]
document_intelligence_key: str = os.environ["DOCUMENT_INTELLIGENCE_KEY"]
storage_account_url: str = os.environ["STORAGE_ACCOUNT_URL"]
blob_raw_sas_token: str = os.environ["BLOB_RAW_SAS_TOKEN"]
blob_processed_sas_token: str = os.environ["BLOB_PROCESSED_SAS_TOKEN"]
blob_final_sas_token: str = os.environ["BLOB_FINAL_SAS_TOKEN"]

# Load configs from config.json file
openai_api_type: str = config["AOAI_CONFIGS"]["API_TYPE"] # "azure"
openai_api_version: str = config["AOAI_CONFIGS"]["API_VERSION"] # = "2023-08-01-preview"
doc_intel_model: str = config["DOC_INTEL_CONFIGS"]["ANALYSIS_MODEL"] # = "prebuilt-layout"
raw_container_name: str = config["BLOB_STORAGE_CONFIGS"]["RAW_CONTAINER"] # = "dnd-rag-bot-raw"
processed_container_name: str = config["BLOB_STORAGE_CONFIGS"]["PROCESSED_CONTAINER"] # = "dnd-rag-bot-processed"
final_container_name: str = config["BLOB_STORAGE_CONFIGS"]["FINAL_CONTAINER"] # = "dnd-rag-bot-final"

## Read the documents from Raw, process with Document Intelligence, and write to Processed

### Create the BlobServiceClients for the storage containers

In [3]:
# See https://pypi.org/project/azure-identity/
# We are using a blob SAS url + token, but you could define at the overall blob and not container or use default credentials 
# token + url allows us to set expirations for access

# Create the BlobServiceClient object so we can connect to the blob storage
# One for raw documents, one for processed documents, and one for the final chunked data before it goes into the search index 
raw_blob_service_client = BlobServiceClient(storage_account_url, blob_raw_sas_token)
processed_blob_service_client = BlobServiceClient(storage_account_url, blob_processed_sas_token)
final_blob_service_client = BlobServiceClient(storage_account_url, blob_final_sas_token)

#### Optional: Write to / Read From local storage

In [None]:
# # Use the local_file_write function to write the results to a local file
# # We are writing the results to a text file and the dictionary to a json file
# fn.local_file_write(dnd_pdf_doc_intel_result, 'text', '../data/results/raw_results', 'dnd_pdf_doc_intel_result.txt')
# fn.local_file_write(dnd_pdf_doc_intel_dict, 'json', '../data/results/dictionaries', 'dnd_pdf_doc_intel_dict.json')

# # Or read in the results from local to save time / doc intel costs
# dnd_pdf_doc_intel_result = fn.local_file_read('../data/results/raw_results/dnd_pdf_doc_intel_result.txt', 'text')
# dnd_pdf_doc_intel_dict = fn.local_file_read('../data/results/dictionaries/dnd_pdf_doc_intel_dict.json', 'json')

### Read in from Processed Blob Storage
This allows for these processes to live in separate Function Apps

In [4]:
dictionary_of_processed_blobs = fn.blob_name_and_url_dict(processed_blob_service_client, processed_container_name)
# print(dictionary_of_processed_blobs)

The container dnd-rag-bot-processed is being accessed.
The file dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json located at https://iancogsearchstorage.blob.core.windows.net/dnd-rag-bot-processed/dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json?si=dnd-rag-bot&spr=https&sv=2022-11-02&sr=c&sig=R6J8EuDEVTvG41nmQ1QlSDexuRT2%2BqDNv0yBiJc0kvc%3D is being added to the blob list.
The file dnd-rag-bot-processed/raw_results/dnd_pdf_doc_intel_result.txt located at https://iancogsearchstorage.blob.core.windows.net/dnd-rag-bot-processed/dnd-rag-bot-processed/raw_results/dnd_pdf_doc_intel_result.txt?si=dnd-rag-bot&spr=https&sv=2022-11-02&sr=c&sig=R6J8EuDEVTvG41nmQ1QlSDexuRT2%2BqDNv0yBiJc0kvc%3D is being added to the blob list.
{'dnd-rag-bot-processed': {'.json': {'dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json': {'file_name': 'dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json', 'blob_url': 'https://iancogsearchstorage.blob.core.windows.net/dnd

In [35]:
# We will be working pimarily with the dictionary of processed blobs, so we will load that in
# and using the json dictionary representation of the Document Ingeligence results
dnd_pdf_doc_intel_dict = fn.load_blob(dictionary_of_processed_blobs, container_name='dnd-rag-bot-processed', file_type='.json', file_name='dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json')

# dnd_pdf_doc_intel_result = fn.load_blob(dictionary_of_processed_blobs, container_name='dnd-rag-bot-processed', file_type='.txt', file_name='dnd-rag-bot-processed/raw_results/dnd_pdf_doc_intel_result.txt')

blob_details:{'file_name': 'dnd-rag-bot-processed/raw_results/dnd_pdf_doc_intel_result.txt', 'blob_url': 'https://iancogsearchstorage.blob.core.windows.net/dnd-rag-bot-processed/dnd-rag-bot-processed/raw_results/dnd_pdf_doc_intel_result.txt?si=dnd-rag-bot&spr=https&sv=2022-11-02&sr=c&sig=R6J8EuDEVTvG41nmQ1QlSDexuRT2%2BqDNv0yBiJc0kvc%3D'}
blob_details:{'file_name': 'dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json', 'blob_url': 'https://iancogsearchstorage.blob.core.windows.net/dnd-rag-bot-processed/dnd-rag-bot-processed/dictionaries/dnd_pdf_doc_intel_dict.json?si=dnd-rag-bot&spr=https&sv=2022-11-02&sr=c&sig=R6J8EuDEVTvG41nmQ1QlSDexuRT2%2BqDNv0yBiJc0kvc%3D'}


In [51]:
print(dnd_pdf_doc_intel_dict['paragraphs'][:900])

[{'role': 'title', 'content': "PLAYER'S HANDBOOK", 'bounding_regions': [{'page_number': 1, 'polygon': [{'x': 1.5854, 'y': 1.0265}, {'x': 7.2441, 'y': 1.0313}, {'x': 7.2435, 'y': 1.7474}, {'x': 1.5848, 'y': 1.7426}]}], 'spans': [{'offset': 0, 'length': 17}]}, {'role': None, 'content': 'DUNGEONS DRAGONS®', 'bounding_regions': [{'page_number': 1, 'polygon': [{'x': 0.6829, 'y': 9.0282}, {'x': 3.3045, 'y': 9.0282}, {'x': 3.3045, 'y': 9.2813}, {'x': 0.6829, 'y': 9.2813}]}], 'spans': [{'offset': 18, 'length': 17}]}, {'role': None, 'content': "Everything a player needs to create heroic characters for the world's greatest roleplaying game", 'bounding_regions': [{'page_number': 1, 'polygon': [{'x': 1.9483, 'y': 10.1645}, {'x': 6.9003, 'y': 10.1645}, {'x': 6.9003, 'y': 10.6324}, {'x': 1.9483, 'y': 10.6324}]}], 'spans': [{'offset': 36, 'length': 95}]}, {'role': 'title', 'content': 'CONTENTS', 'bounding_regions': [{'page_number': 2, 'polygon': [{'x': 3.8393, 'y': 0.6063}, {'x': 5.2624, 'y': 0.6254}

In [52]:
print(dnd_pdf_doc_intel_dict['pages'][1])

{'page_number': 2, 'angle': 0.2677353024482727, 'width': 8.5, 'height': 11.0, 'unit': 'inch', 'lines': [{'content': 'CONTENTS', 'polygon': [{'x': 3.8393, 'y': 0.6063}, {'x': 5.2624, 'y': 0.6254}, {'x': 5.2576, 'y': 0.8355}, {'x': 3.8393, 'y': 0.8212}], 'spans': [{'offset': 132, 'length': 8}]}, {'content': 'PREFACE', 'polygon': [{'x': 1.2368, 'y': 1.0217}, {'x': 1.977, 'y': 1.0313}, {'x': 1.9722, 'y': 1.1984}, {'x': 1.232, 'y': 1.1888}], 'spans': [{'offset': 141, 'length': 7}]}, {'content': '4', 'polygon': [{'x': 4.3025, 'y': 1.0694}, {'x': 4.3598, 'y': 1.0694}, {'x': 4.3598, 'y': 1.1745}, {'x': 4.3025, 'y': 1.1697}], 'spans': [{'offset': 149, 'length': 1}]}, {'content': 'INTRODUCTION', 'polygon': [{'x': 1.2225, 'y': 1.3798}, {'x': 2.5166, 'y': 1.3941}, {'x': 2.5166, 'y': 1.5373}, {'x': 1.2225, 'y': 1.5278}], 'spans': [{'offset': 151, 'length': 12}]}, {'content': '5', 'polygon': [{'x': 4.3025, 'y': 1.4132}, {'x': 4.3598, 'y': 1.4132}, {'x': 4.3598, 'y': 1.5135}, {'x': 4.3025, 'y': 1.508

In [45]:
# Creating a dictionary to hold the content of each page
pages_content = {}

for page in dnd_pdf_doc_intel_dict['pages']:
    page_number = page['page_number']
    content = ""

    # Concatenating the content from lines
    for line in page['lines']:
        content += line['content'] + " "

    # Concatenating the content from words
    for word in page['words']:
        content += word['content'] + " "

    # Storing the content in the dictionary
    pages_content[page_number] = content

In [55]:
pages_content[2]

'CONTENTS PREFACE 4 INTRODUCTION 5 Worlds of Adventure. 5 Using This Book 6 How to Play. 6 Adventures 7 PART 1 9 CHAPTER 1: STEP-BY-STEP CHARACTERS 11 Beyond 1st Level 15 CHAPTER 2: RACES 17 Choosing a Race. 17 Dwarf. 18 Elf 21 Halfling 26 Human 29 Dragonborn 32 Gnome. 35 Half-Elf 38 Half-Orc 40 Tiefling 42 CHAPTER 3: CLASSES 45 Barbarian. 46 Bard 51 Cleric. 56 Druid. 64 Fighter 70 Monk 76 Paladin 82 Ranger 89 Rogue .. 94 Sorcerer 99 Warlock 105 Wizard 112 CHAPTER 4: PERSONALITY AND BACKGROUND 121 Character Details 121 Inspiration 125 Backgrounds 125 CHAPTER 5: EQUIPMENT 143 Starting Equipment. 143 Wealth 143 Armor and Shields 144 Weapons. 146 Adventuring Gear 148 Tools ... 154 Mounts and Vehicles 155 Trade Goods 157 Expenses. 157 Trinkets 159 CHAPTER 6: CUSTOMIZATION OPTIONS 163 Multiclassing. 163 Feats 165 PART 2 171 CHAPTER 7: USING ABILITY SCORES 173 Ability Scores and Modifiers. 173 Advantage and Disadvantage .. 173 Proficiency Bonus. 173 Ability Checks. 174 Using Each Ability 175