In [None]:
import os
import json
import requests
from datetime import datetime
from dotenv import load_dotenv
from langchain_core.tools import tool
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, ToolMessage

# Load environment variables
load_dotenv()

# Initialize Azure OpenAI client via LangChain
model = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    #api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2023-07-01-preview",
    model=os.getenv("AZURE_OPENAI_MODEL_NAME"),
    temperature=0
)

In [None]:
import os
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_community.vectorstores import AzureSearch
from langchain_community.retrievers import AzureAISearchRetriever
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# env vars (set these with your values)
##os.environ["AZURE_AI_SEARCH_SERVICE_NAME"] = "<your-search-service-name>"  # endpoint name (service name)
#os.environ["AZURE_AI_SEARCH_INDEX_NAME"] = "langchain-vector-demo"
#os.environ["AZURE_AI_SEARCH_API_KEY"] = "<your-search-admin-key>"

#AZURE_OPENAI_ENDPOINT = "https://<your-azure-openai>.openai.azure.com"
#AZURE_OPENAI_KEY = "<your-azure-openai-key>"
#AZURE_EMBED_DEPLOYMENT = "<embedding-deployment-name>"  # e.g., text-embedding-3-large

# 1) Load documents
loader = DirectoryLoader("./docs", glob="**/*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

# 2) Chunk
splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = splitter.split_documents(documents)

# 3) Embeddings: AzureOpenAIEmbeddings (you must have an embedding deployment)
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=AZURE_EMBED_DEPLOYMENT,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    #openai_api_key=AZURE_OPENAI_KEY,
)

# 4) Create or connect AzureSearch vector store (this creates index for you)
vector_store = AzureSearch(
   # azure_search_endpoint=os.getenv("AZURE_AI_SEARCH_SERVICE_NAME"),
    #azure_search_key=os.getenv("AZURE_AI_SEARCH_API_KEY"),
   # index_name=os.getenv("AZURE_AI_SEARCH_INDEX_NAME"),
    embedding_function=embeddings.embed_query,  # function to produce embeddings
)

# 5) Add documents (this will create index schema and upload embeddings)
vector_store.add_documents(documents=docs)

# 6) Create retriever (LangChain wrapper)
retriever = AzureAISearchRetriever(content_key="content", top_k=4, index_name=os.getenv("AZURE_AI_SEARCH_INDEX_NAME"))

# 7) LLM for answer generation (AzureChatOpenAI)
llm = AzureChatOpenAI(azure_deployment="<llm-deployment-name>", temperature=0.0, max_tokens=1000)

# 8) Build retrieval chain and use it
combine_chain = create_stuff_documents_chain(llm)
retrieval_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=combine_chain)

resp = retrieval_chain.invoke({"input": "Give me a short summary of the process to onboard a new customer from the docs."})
print(resp["answer"])
