In [3]:
import os
from langchain_community.document_loaders import WebBaseLoader
page_url = "https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview"
loader = WebBaseLoader(web_paths=[page_url])
content  = loader.load()
MODEL_NAME = "gpt-4o"
API_VERSION="2024-12-01-preview"

os.environ["AZURE_OPENAI_API_KEY"] = ""
os.environ["AZURE_OPENAI_ENDPOINT"] = ""
os.environ["OPENAI_API_KEY"] = ""


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=80)
texts = text_splitter.split_documents(content)

In [5]:
from langchain_openai import AzureOpenAIEmbeddings

from langchain_community.vectorstores import FAISS
embedding_db = AzureOpenAIEmbeddings(model="text-embedding-ada-002")
fdb = FAISS.from_documents(documents=texts , embedding=embedding_db)


In [6]:
fdb.similarity_search_with_score(query="Tiers")

[(Document(id='2ce6c3c1-4038-405c-b249-c7fd552db8e3', metadata={'source': 'https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview', 'title': 'Storage account overview - Azure Storage | Microsoft Learn', 'description': 'Learn about the different types of storage accounts in Azure Storage. Review account naming, performance tiers, access tiers, redundancy, encryption, endpoints, and more.', 'language': 'en-us'}, page_content="Region refers to the geographical region in which your account is based.\nAccount type refers to the type of storage account you're using.\nAccess tier refers to the data usage pattern you’ve specified for your general-purpose v2 or Blob Storage account.\nCapacity refers to how much of your storage account allotment you're using to store data.\nRedundancy determines how many copies of your data are maintained at one time, and in what locations.\nTransactions refer to all read and write operations to Azure Storage.\nData egress refers to any 

## With retrieval chain (Get response from llm)

In [7]:
from langchain_openai import AzureChatOpenAI

llm_model = AzureChatOpenAI(model=MODEL_NAME, api_version=API_VERSION)

In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
prompt_template = ChatPromptTemplate.from_template(
    """
        Answer the following question based only on the provided context:
            <context>
            {context}
            </context>

    """
)

document_chain = create_stuff_documents_chain(llm_model, prompt_template)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n        Answer the following question based only on the provided context:\n            <context>\n            {context}\n            </context>\n\n    '), additional_kwargs={})])
| AzureChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002888039AE30>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002888039A860>, root_client=<openai.lib.azure.AzureOpenAI object at 0x0000028833A75750>, root_async_client=<openai.lib.azure.AsyncAzureOpenAI object at 0x000002888039A7D0>, model_name='gpt-4o', model_kwargs=

In [9]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(fdb.as_retriever(), document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'AzureOpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002888039A1D0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n        Answer the following question based only on the provided context:\n            <context>\n            {context}\n            </context

In [12]:
resp = retrieval_chain.invoke({"input":"How to Transfer data into a storage account"})
resp['answer']

'**Question:** What factors are involved in calculating storage costs for an Azure storage account?\n\n**Answer:** Storage costs for an Azure storage account are calculated based on the following factors:  \n\n1. **Account type:** Different storage account types have varying pricing models.  \n2. **Storage capacity:** The amount of data stored in the account.  \n3. **Replication:** The redundancy options used for data replication (e.g., ZRS, GZRS).  \n4. **Transactions:** The number of operations performed on the storage.  \n\nAdditionally, detailed pricing information can be found on the Azure Storage pricing page and the Data Transfers pricing details page.'

In [11]:
resp

{'input': 'How is storage account billed??',
 'context': [Document(id='7ef59ba9-42bf-4347-8d60-7d88374b3fbd', metadata={'source': 'https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview', 'title': 'Storage account overview - Azure Storage | Microsoft Learn', 'description': 'Learn about the different types of storage accounts in Azure Storage. Review account naming, performance tiers, access tiers, redundancy, encryption, endpoints, and more.', 'language': 'en-us'}, page_content="Transfer data into a storage account\nMicrosoft provides services and utilities for importing your data from on-premises storage devices or third-party cloud storage providers. Which solution you use depends on the quantity of data you're transferring. For more information, see Azure Storage migration overview.\nStorage account encryption\nAll data in your storage account is automatically encrypted on the service side. For more information about encryption and key management, see Azure 