In [1]:
import boto3
from dotenv import load_dotenv
import os
import json

In [15]:
load_dotenv("./")

sesion_aws = boto3.Session(
    aws_access_key_id=os.getenv("aws_key"),
    aws_secret_access_key=os.getenv("aws_secret"),
    region_name="us-east-1"
)
# Populate a vector index with embeddings from Amazon Titan Text Embeddings V2.

# Create Bedrock Runtime and S3 Vectors clients in the AWS Region of your choice. 
bedrock = sesion_aws.client("bedrock-runtime")
s3vectors = sesion_aws.client("s3vectors")

In [None]:
# Texts to convert to embeddings.
texts = [
    "Star Wars: A farm boy joins rebels to fight an evil empire in space", 
    "Jurassic Park: Scientists create dinosaurs in a theme park that goes wrong",
    "Finding Nemo: A father fish searches the ocean to find his lost son"
]

# Generate vector embeddings.
embeddings = []
for text in texts:
    response = bedrock.invoke_model(
        modelId="amazon.titan-embed-text-v2:0",
        body=json.dumps({"inputText": text})
    )

    # Extract embedding from response.
    response_body = json.loads(response["body"].read())
    embeddings.append(response_body["embedding"])


In [6]:
len(embeddings[0])

1024

## Insert

In [10]:
# Write embeddings into vector index with metadata.
s3vectors.put_vectors(
    vectorBucketName="media-embeddings",   
    indexName="movies",   
    vectors=[
        {
            "key": "Star Wars",
            "data": {"float32": embeddings[0]},
            "metadata": {"source_text": texts[0], "genre":"scifi"}
        },
        {
            "key": "Jurassic Park",
            "data": {"float32": embeddings[1]},
            "metadata": {"source_text": texts[1], "genre":"scifi"}
        },
        {
            "key": "Finding Nemo",
            "data": {"float32": embeddings[2]},
            "metadata": {"source_text": texts[2], "genre":"family"}
        }
    ]
)

{'ResponseMetadata': {'RequestId': '5f8c4718-049d-491f-ba9b-905762e92601',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Wed, 20 Aug 2025 22:28:32 GMT',
   'content-type': 'application/json',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amz-request-id': '5f8c4718-049d-491f-ba9b-905762e92601',
   'access-control-allow-origin': '*',
   'vary': 'origin, access-control-request-method, access-control-request-headers',
   'access-control-expose-headers': '*'},
  'RetryAttempts': 0}}

## Retrive

In [None]:
# Query text to convert to an embedding. 
input_text = "adventures in space"

# Generate the vector embedding.
response = bedrock.invoke_model(
    modelId="amazon.titan-embed-text-v2:0",
    body=json.dumps({"inputText": input_text})
) 

# Extract embedding from response.
model_response = json.loads(response["body"].read())
embedding = model_response["embedding"]
model_response, embedding

In [25]:

# Query vector index.
response = s3vectors.query_vectors(
    vectorBucketName="media-embeddings",
    indexName="movies",
    queryVector={"float32": embedding}, 
    topK=3, 
    returnDistance=True,
    returnMetadata=True
)
print(json.dumps(response["vectors"], indent=2))

[
  {
    "key": "Star Wars",
    "metadata": {
      "source_text": "Star Wars: A farm boy joins rebels to fight an evil empire in space",
      "genre": "scifi"
    },
    "distance": 0.7918925285339355
  },
  {
    "key": "Jurassic Park",
    "metadata": {
      "source_text": "Jurassic Park: Scientists create dinosaurs in a theme park that goes wrong",
      "genre": "scifi"
    },
    "distance": 0.859985888004303
  },
  {
    "key": "Finding Nemo",
    "metadata": {
      "source_text": "Finding Nemo: A father fish searches the ocean to find his lost son",
      "genre": "family"
    },
    "distance": 0.9480686187744141
  }
]


In [23]:
response['vectors']

[]

In [14]:
# Query vector index with a metadata filter.
response = s3vectors.query_vectors(
    vectorBucketName="media-embeddings",
    indexName="movies",
    queryVector={"float32": embedding}, 
    topK=3, 
    filter={"genre": "scifi"},
    returnDistance=True,
    returnMetadata=True
)
print(json.dumps(response["vectors"], indent=2))

[
  {
    "key": "Star Wars",
    "metadata": {
      "source_text": "Star Wars: A farm boy joins rebels to fight an evil empire in space",
      "genre": "scifi"
    },
    "distance": 0.7918925285339355
  },
  {
    "key": "Jurassic Park",
    "metadata": {
      "genre": "scifi",
      "source_text": "Jurassic Park: Scientists create dinosaurs in a theme park that goes wrong"
    },
    "distance": 0.859985888004303
  }
]


In [None]:
# Query text to convert to an embedding. 
input_text = "que es la inteligencia artificial?"

# Generate the vector embedding.
response = bedrock.invoke_model(
    modelId="amazon.titan-embed-text-v2:0",
    body=json.dumps({"inputText": input_text})
) 

# Extract embedding from response.
model_response = json.loads(response["body"].read())
embedding = model_response["embedding"]
model_response, embedding

In [27]:

# Query vector index.
response = s3vectors.query_vectors(
    vectorBucketName="s3-vector-ia-test-18ago2025",
    indexName="s3-vector-index-kb1",
    queryVector={"float32": embedding}, 
    topK=3, 
    returnDistance=True,
    returnMetadata=True
)
print(json.dumps(response["vectors"], indent=2))

[]


## List all vector on a index

In [None]:
!aws s3vectors list-vectors \
  --vector-bucket-name "media-embeddings" \
  --index-name "movies" \
  --segment-count 2 \
  --segment-index 0 \
  --return-data \
  --return-metadata

In [34]:
!aws s3vectors list-vectors \
  --vector-bucket-name "s3vector-demo-01" \
  --index-name "s3vector-index-kb01" \
  --segment-count 2 \
  --segment-index 0 \
  --return-data \
  --return-metadata

{
    "vectors": []
}


In [1]:
!aws s3vectors list-vectors \
  --vector-bucket-name "media-embeddings" \
  --index-name "movies"


An error occurred (AccessDeniedException) when calling the ListVectors operation: The security token included in the request is invalid.


## Prueba de validacion de recupoeracion mediante Knowledge Bases

In [37]:
import boto3

# ⚙️ Configura tu región
REGION = "us-east-1"

# IDs de tu KB y modelo (puedes usar modelArn con ID corto o un Inference Profile ARN)
KNOWLEDGE_BASE_ID = "T4BUDLPSVT"  # <-- tu KB
MODEL_ID_OR_ARN = "amazon.nova-lite-v1:0"  # o un inference-profile ARN

client = sesion_aws.client("bedrock-agent-runtime", region_name=REGION)

user_query = "¿que es la inteligencia artificial?"

resp = client.retrieve_and_generate(
    input={"text": user_query},
    retrieveAndGenerateConfiguration={
        "type": "KNOWLEDGE_BASE",
        "knowledgeBaseConfiguration": {
            "knowledgeBaseId": KNOWLEDGE_BASE_ID,
            "modelArn": MODEL_ID_OR_ARN,
            # (opcional) configuración de generación
            "generationConfiguration": {
                "inferenceConfig": {
                    "textInferenceConfig": {
                        "temperature": 0.2,
                        "maxTokens": 800
                    }
                }
            }
        }
    }
)

print("=== Respuesta ===")
print(resp["output"]["text"])
print()

print("=== Fuentes citadas ===")
for c in resp.get("citations", []):
    for ref in c.get("retrievedReferences", []):
        loc = ref.get("location", {})
        s3 = loc.get("s3Location", {})
        web = loc.get("webLocation", {})
        conf = loc.get("confluenceLocation", {})
        sharepoint = loc.get("sharePointLocation", {})
        print(
            s3.get("uri")
            or web.get("url")
            or conf.get("url")
            or sharepoint.get("url")
            or loc
        )

=== Respuesta ===
Question: ¿que es la inteligencia artificial?

Answer: La inteligencia artificial es un campo de la ciencia y la ingeniería que se centra en la creación de sistemas inteligentes capaces de realizar tareas que, cuando las realizan los humanos, requieren inteligencia. La inteligencia artificial se define de diversas maneras, pero una de las definiciones más comunes es la propuesta por Alan Turing en 1950, que establece que una conducta es inteligente cuando tiene un nivel lo suficientemente alto como para confundir a un interlocutor humano. El campo de la inteligencia artificial se originó en 1956, cuando un grupo de investigadores se reunió en Dartmouth para discutir temas relacionados con la inteligencia, las redes neuronales y la teoría de autómatas. Este encuentro se considera el punto de partida de la inteligencia artificial como disciplina científica. La inteligencia artificial se considera tanto una ciencia como una ingeniería, ya que implica tanto el desarrollo 

In [41]:
kb_id = "T4BUDLPSVT"
model_arn = "arn:aws:bedrock:us-east-1::foundation-model/amazon.nova-lite-v1:0"
bedrock_agent = sesion_aws.client("bedrock-agent-runtime")

def retrieve_generated (input, kb_id, model_arn) :
    response = bedrock_agent.retrieve_and_generate(
        input = {
            'text': input
        },
        retrieveAndGenerateConfiguration={
            'type': 'KNOWLEDGE_BASE',
            'knowledgeBaseConfiguration': {
                'knowledgeBaseId': kb_id,
                'modelArn': model_arn
            }
        }
    )
    return response
 
response = retrieve_generated ('en que año inicia la inteligencia artificial. Responde en español', kb_id=kb_id, model_arn=model_arn)
#print(response)
print(response.keys())
print(response['output'])
print("##########################")
print(json.dumps(response['citations'], indent=2))

dict_keys(['ResponseMetadata', 'citations', 'output', 'sessionId'])
{'text': 'Based on the retrieved results, the term "artificial intelligence" was adopted in 1956 during a meeting at Dartmouth College. However, the concept of artificial intelligence can be traced back to the work of W. McCulloch and M. Pitts in 1943, which proposed a model of artificial neurons based on the physiology and functioning of brain neurons.'}
##########################
[
  {
    "generatedResponsePart": {
      "textResponsePart": {
        "span": {
          "end": 124,
          "start": 0
        },
        "text": "Based on the retrieved results, the term \"artificial intelligence\" was adopted in 1956 during a meeting at Dartmouth College"
      }
    },
    "retrievedReferences": [
      {
        "content": {
          "text": "\u00a9 FUOC\u00a0\u2022 PID_00267999 7 Qu\u00e9 es la inteligencia artificial     1. Breve repaso hist\u00f3rico de la inteligencia artificial     El t\u00e9rmino inteligenc