In [1]:
import os

AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")

AZURE_AI_SEARCH_ENDPOINT = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
AZURE_AI_SEARCH_API_KEY = os.getenv("AZURE_AI_SEARCH_API_KEY")

O365_CLIENT_ID = os.getenv("O365_CLIENT_ID")
O365_CLIENT_SECRET = os.getenv("O365_CLIENT_SECRET")
DOCUMENT_LIBRARY_ID = os.getenv("DOCUMENT_LIBRARY_ID")


In [None]:
from langchain_openai import AzureOpenAIEmbeddings


EMBEDDINGS_MODEL = AzureOpenAIEmbeddings(
    openai_api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    azure_deployment="text-embedding-3-small",
    model="text-embedding-3-small",
)

In [3]:
import os

from langchain_community.document_loaders import SharePointLoader

loader = SharePointLoader(
    document_library_id=os.environ.get("DOCUMENT_LIBRARY_ID"),
    auth_with_token=True,
    folder_path="/compliance",
)
docs = loader.load()
docs[:5]



[Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'page': 0, 'total_pages': 25, 'format': 'PDF 1.6', 'title': 'CdC 2023', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 28.0 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': "D:20231215133914-03'00'", 'modDate': 'D:20231218135108Z', 'trapped': ''}, page_content='2023\n'),
 Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20N

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = splitter.split_documents(docs)
split_docs[:5]

[Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'page': 0, 'total_pages': 25, 'format': 'PDF 1.6', 'title': 'CdC 2023', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 28.0 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': "D:20231215133914-03'00'", 'modDate': 'D:20231218135108Z', 'trapped': ''}, page_content='2023'),
 Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Neg

In [5]:
from langchain_community.vectorstores import AzureSearch

index_name = "sharepoint-index"

vectorstore = AzureSearch(
    azure_search_endpoint=AZURE_AI_SEARCH_ENDPOINT,
    azure_search_key=AZURE_AI_SEARCH_API_KEY,
    index_name=index_name,
    embedding_function=EMBEDDINGS_MODEL.embed_query,
    additional_search_client_options={"retry_total": 4},
)

In [6]:
vectorstore.add_documents(split_docs)

['MTE1Mzg2N2MtMzI3Ni00YTZjLTgxNmQtNmEyZmU4NjMwOTI1',
 'OTQyMTE1ODMtODA4OC00ODFjLWEyOTAtMGEyM2IwMzJiYWRi',
 'MDcwMDlmYjQtNDAxMi00MzA3LWIwMzgtNjhmZGYzYzFhMTFk',
 'ZmJkZmM3N2UtYjM2Yy00Y2FmLWEwNzgtOGU5ZDRlOWI3Yjlj',
 'MGJkMjUwOTItN2UzMC00NzVkLTlkMjUtZDc5YTY0ZmNjMmVj',
 'MTRiYWE4ZmQtODM3Mi00MjE1LTgwNjctZTQ2NWQ5NzUxYjEx',
 'MDdhYmZmN2MtMWFkZi00MGVmLThmMGMtYTJlMjQ1MGI3ZTEw',
 'ZmU0YTY5ZTgtNTlhMi00OGM0LThhYWItYmMzZWY5NDczMDhj',
 'MzE4YTI0ZmQtMzBhNi00OTgxLWIzOTAtYTY5ZTI5NDU0OWNm',
 'MTkzYjgzNzctZDc1Ny00YWVkLTkzMGEtMzgzOWZkNDJhZDM2',
 'ZTU3MmFiM2UtYzM0Zi00NjhlLWE4ZjctZjA0ZTdkYjMyNTBi',
 'ZGE4NGRiNmEtZWMxMi00MTE3LThlZjMtYzlkMTUyOWYzYTAw',
 'MWRlMDM0MzItYTQxZC00YWQ0LWI5NDAtYjRiNmQwNmUxZTE4',
 'YWIyOTdmMTQtYjI1Yi00NzJkLTk0MDYtMzNiZTQ0NTM0ODkx',
 'YmFhODEzNDItMDc5MC00NDU5LWE2NmUtYWRiODFiNTY5Yzgx',
 'MGU5MWVlOGQtMDlkMy00Yjc0LThjMWEtYTAyOTUwNjRiYTdj',
 'M2ZmYzQzZmEtZTMyYy00MDljLTk4ZjgtZTBhNGFmZjUyMTRi',
 'ZDVjNWIzODktYjlkZi00NzA3LWE1OWItZGE1NTlkODBjMmE0',
 'MjA0MTUzNTgtMThjNC00NTAzLTgwNTQtMjZjZTk4ZWFi

In [7]:
retrieved_doc = vectorstore.similarity_search(
    query="tratamiento peces",
    k=3,
    search_type="similarity",
)
retrieved_doc[0].page_content

'pmv.sernapesca.cl       Página: 2 / 3\nCod. Unidad de cultivo\nNº de peces para\ntratamiento\nPeso promedio\ndurante el tratamiento\n(g)\nGrupo / Lote\nVolumen a tratar (m3)\n101\n52.845\n3.262\nMA01-PA-PH-S-CAT-\nOTO-21-M\n0\n102\n49.367\n3.390\nMA01-PA-PH-S-CAT-\nOTO-21-M\n0\n103\n47.497\n3.181\nMA01-PA-PH-S-CAT-\nOTO-21-H\n0\n104\n41.054\n3.677\nMA01-PA-PH-S-CAT-\nOTO-21-H\n0\n105\n48.527\n3.378\nMA01-PA-RM-CU-S-\nCAT-OTO-21-H\n0\n106\n44.307\n3.307\nMA01-PA-ME-S-CAT-\nOTO-21-M\n0\n107\n48.872\n3.317\nMA01-PA-PH-S-CAT-\nOTO-21-H\n0\n108\n44.970\n3.157\nMA01-PA-ME-S-CAT-\nOTO-21-M\n0\n109\n48.953\n3.253\nMA01-PA-ME-S-CAT-\nOTO-21-H\n0\n110\n45.566\n2.283\nMA01-PA-ME-S-CAT-\nOTO-21-M\n0\n111\n48.651\n3.070\nMA01-PA-RM-CU-S-\nCAT-OTO-21-H\n0\n112\n51.306\n2.882\nMA01-LS-PH-S-CAT-\nOTO-21-M\n0\n113\n48.518\n3.212\nMA01-PA-ME-S-CAT-\nOTO-21-H\n0\n114\n51.167\n2.784\nMA01-LS-PH-S-CAT-\nOTO-21-M\n0\n115\n48.866\n2.941\nMA01-PA-PH-S-CAT-\nOTO-21-H\n0\n116\n50.946\n2.731\nMA01-LS-CB-S-CAT-\