In [None]:
## Carga de variables de entorno
import os
from dotenv import load_dotenv
load_dotenv()
aws_access_key_id=os.getenv("aws_access_key_id")
aws_secret_access_key=os.getenv("aws_secret_access_key")
region_name=os.getenv("region_name")
qdrant_url=os.getenv("qdrant_url")
qdrant_api_key=os.getenv("qdrant_api_key")

In [None]:
## Preparando el embedding
import boto3
from langchain_aws import BedrockEmbeddings

bedrock_client = boto3.client(
    'bedrock-runtime',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1",
                                       client=bedrock_client)

In [None]:
## Preparandon los documentos
import json
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from IPython.display import display

with open('ingest_rag.json', 'r') as file:
    data = json.load(file)

documentos = []

for d in data:
    nuevo_doc = Document(
        page_content=d["contenido"],
        metadata={"tipo": d["tipo"], "seccion": d["seccion"]}
    )
    documentos.append(nuevo_doc)


text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documentos)

In [None]:
## Ingesta de data
from langchain_qdrant import QdrantVectorStore

QdrantVectorStore.from_documents(
    documentos, 
    embeddings, 
    url=qdrant_url, 
    api_key=qdrant_api_key, 
    collection_name="tdr"
)

In [None]:
## Lectura de datos
from qdrant_client import QdrantClient
from langchain_qdrant import RetrievalMode, QdrantVectorStore

client = QdrantClient(
                url=qdrant_url,
                api_key=qdrant_api_key
            )

vector_store = QdrantVectorStore(
                client=client,
                embedding=embeddings,
                collection_name="tdr",
                retrieval_mode=RetrievalMode.DENSE
            )

query = "cual es la empresa que muestra los antecedentes?"

results = vector_store.similarity_search(query,k=3)

display(results)
