In [1]:
import os

AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
OPENAI_API_VERSION = os.getenv("OPENAI_API_VERSION")

AZURE_AI_SEARCH_ENDPOINT = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
AZURE_AI_SEARCH_API_KEY = os.getenv("AZURE_AI_SEARCH_API_KEY")

O365_CLIENT_ID = os.getenv("O365_CLIENT_ID")
O365_CLIENT_SECRET = os.getenv("O365_CLIENT_SECRET")
DOCUMENT_LIBRARY_ID = os.getenv("DOCUMENT_LIBRARY_ID")


In [2]:
from langchain_openai import AzureOpenAIEmbeddings


EMBEDDINGS_MODEL = AzureOpenAIEmbeddings(
    openai_api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    azure_deployment="text-embedding-3-small",
    model="text-embedding-3-small",
)

In [4]:
import os

from langchain_community.document_loaders import SharePointLoader

loader = SharePointLoader(
    document_library_id=DOCUMENT_LIBRARY_ID,
    auth_with_token=True,
    folder_path="/compliance/reglamentos",
)
docs = loader.load()
docs[:5]



[Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'page': 0, 'total_pages': 25, 'format': 'PDF 1.6', 'title': 'CdC 2023', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 28.0 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': "D:20231215133914-03'00'", 'modDate': 'D:20231218135108Z', 'trapped': ''}, page_content='2023\n'),
 Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = splitter.split_documents(docs)
split_docs[:5]

[Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'page': 0, 'total_pages': 25, 'format': 'PDF 1.6', 'title': 'CdC 2023', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 28.0 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': "D:20231215133914-03'00'", 'modDate': 'D:20231218135108Z', 'trapped': ''}, page_content='2023'),
 Document(metadata={'source': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-AUD-001%20C%C3%B3digo%20Conducta%20en%20Los%20Negocios%20AQ.pdf', 'file_path': 'https://aquachile.sharepoint.com/sites/aqua_pdf/Documentos%20compartidos/compliance/reglamentos/AQ-PO-A

In [6]:
from langchain_community.vectorstores import AzureSearch

index_name = "sharepoint-index"

vectorstore = AzureSearch(
    azure_search_endpoint=AZURE_AI_SEARCH_ENDPOINT,
    azure_search_key=AZURE_AI_SEARCH_API_KEY,
    index_name=index_name,
    embedding_function=EMBEDDINGS_MODEL.embed_query,
    additional_search_client_options={"retry_total": 4},
)

In [7]:
vectorstore.add_documents(split_docs)

['Y2Y4ZDZhZDgtY2YyNy00MTgzLThiZDgtMDcyZDMyNzNhMTE0',
 'ODk2MGM4ZTAtODY4ZS00NjE4LTg3YjMtZTBiZDEwODY0OGM3',
 'MmQyZDJjMzEtNzcwMy00YzA3LTgzNmUtNWU5ZmQ4ZDIzYzQ4',
 'NmRhYjJkNTctYTg2OC00YjllLWFhZWItNWRkMzE4MDYzNGI3',
 'NDg0Njg5MTItOTU1MS00OTVmLWE1OTItODdhZDcwNzYzZTA4',
 'MmVlYzFlNmUtZjNkOS00NWU4LWJjYzMtZjgyNjRlMGExMjA0',
 'NjQ3MGVhMzMtYmFmYy00NTY0LThkMTYtYzY2ODUxNmVhOGI5',
 'MDY2ODU3NGQtZjRlNy00MGQ0LWI0ZDctZjMyMWZjNjFmMGUz',
 'ODlkOTQ3NzctMTczYy00NjZmLWIxYzAtZjAwMzhiNTkwZDI1',
 'MjNlZjk1NTQtMjVlYS00MTAwLTkzNWEtY2ZkNjYwNTExNWJm',
 'Zjk2NTc0ZjQtNDE3My00MWY1LTk2MjctMjUzMzBkOGJiOTg3',
 'ZTZkYmEwMzQtYTIxNi00OGJmLWExMDItNzAyZGZhZGM4NmEy',
 'YTk4NjFhYjUtMzM4MC00NmE2LWE2N2MtNmU3Y2E2YzBlOGI4',
 'YmEwMjZhNDgtODYxMi00MjVmLWE4MDYtZjM5YWIzM2I3NmE5',
 'NmY0MWUyODAtMDQ2MS00ZWY3LTk2MjQtMTg1YTRiZGEyMGI4',
 'MzgyZGY3NGItZjAzMS00ZTFhLTg5NGUtMTQ1OWNhOTkxZThl',
 'NjA4NTU1NjItMDUwMi00NDI0LWIzZTctZjdlNGI2Yzk5NDU5',
 'Yzk0OTU0ZDAtZjQyNy00YjM5LTlkMzMtZDFiZGZhY2RlMGQ2',
 'ODJkMjcxYjQtN2M3Yi00MTA1LThlZWMtZmMyOTg5MzZj

In [8]:
retrieved_doc = vectorstore.similarity_search(
    query="conducta",
    k=3,
    search_type="similarity",
)
retrieved_doc[0].page_content

'3.2  CÓDIGO DE CONDUCTA EN LOS NEGOCIOS \nDocumento que contiene reglas para que cada empleado, independiente de su rango jerárquico, \npromueva una conducta basada en un comportamiento cuyos sellos distintivos, sea la rectitud u \nhonestidad, en cada una de sus acciones.'