# Langchain: Upload Documents

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [6]:
index_name="scorp-index-langchain"

In [3]:
search_service_endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
search_service_key = os.environ["AZURE_SEARCH_API_KEY"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
azure_openai_key = os.getenv("AZURE_OPENAI_KEY", "")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1024))
embedding_model_name = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-large")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-06-01")

In [4]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_openai_embedding_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    dimensions=azure_openai_embedding_dimensions
)

In [9]:
from langchain_community.vectorstores.azuresearch import AzureSearch

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_service_endpoint,
    azure_search_key=search_service_key,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter

loader = PyPDFLoader("../files/QML-DS.pdf")

documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

vector_store.add_documents(documents=docs)

In [8]:
from langchain_community.vectorstores.azuresearch import AzureSearch

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=search_service_endpoint,
    azure_search_key=search_service_key,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    # Configure max retries for the Azure client
    additional_search_client_options={"retry_total": 4},
)

docs = vector_store.similarity_search(
    query="What is quantum machine learning?",
    k=3,
    search_type="similarity",
)

In [None]:
docs[0]