# Langchain: Upload Documents

In [49]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [50]:
index_name="scorp-index-langchain"

In [129]:
search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
search_service_key = os.environ["AZURE_SEARCH_API_KEY"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.getenv("AZURE_OPENAI_KEY", "")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1024))
embedding_model_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_gpt_deployment = os.getenv("AZURE_OPENAI_GPT_DEPLOYMENT", "gpt-35-turbo-instruct")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-05-01-preview")
openai_api_key = os.getenv("OPENAI_API_KEY", "")

## Langchain Embeddings

In [52]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_openai_embedding_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    dimensions=azure_openai_embedding_dimensions
)

## Create Vector Store

In [21]:
from langchain_community.vectorstores.azuresearch import AzureSearch

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_service_endpoint,
    azure_search_key=search_service_key,
    semantic_configuration_name="semantic-config",
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

## Load and upload PDF document

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter

loader = PyPDFLoader("../files/QML-DS.pdf")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)

## Perform similarity search

In [8]:
docs = vector_store.similarity_search(
    query="What is quantum machine learning?",
    k=3,
    search_type="similarity",
)

## Azure Open AI - llm invocation

In [132]:
from langchain_openai import AzureOpenAI

llm = AzureOpenAI(
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    azure_deployment=azure_openai_gpt_deployment,
    api_version=azure_openai_api_version,
    max_tokens=1000,
    temperature=0.5
)

In [None]:
llm.invoke("What is quantum machine learning?")

## Create Azure AI Search Retriever

In [96]:
from langchain.retrievers.azure_ai_search import AzureAISearchRetriever

retriever = AzureAISearchRetriever(
    service_name=search_service_endpoint,
    api_key=search_service_key,
    index_name=index_name,
    content_key="content",
    top_k=2
)

## Azure Chat Open AI instance

In [118]:
from langchain_openai import AzureChatOpenAI

chat_llm = AzureChatOpenAI(
        openai_api_version="2024-05-01-preview",
        api_key=azure_openai_key,
        azure_endpoint=azure_openai_endpoint,
        azure_deployment="gpt-35-turbo-16k",
        temperature=0.0,
    )

In [125]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt", api_url="https://api.hub.langchain.com")

In [126]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=chat_llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": prompt,
    },
)

In [127]:
response = qa({"query": "Explain braket notation"})

In [None]:
response['result']

"Braket notation is a mathematical notation used in quantum mechanics to represent quantum states. It uses the Dirac notation, which represents quantum states as vectors in a complex vector space. In braket notation, a quantum state is represented as |ψ⟩, where the vertical bars indicate a quantum state and the angle brackets indicate the state's name or label."