#### Librerias

In [1]:
import os
import json
import boto3

### Creds

In [2]:
aws_access_key = os.getenv("AWS_ACCESS_KEY")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
aws_region = os.getenv("AWS_REGION")

#### Boto3 Session

In [3]:
session = boto3.Session(
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_access_key,
    region_name=aws_region
)

#### S3 Metadata

In [4]:
s3 = session.client(service_name="s3")

In [5]:
s3.delete_objects(
    Bucket="kendra-filter",
    Delete={"Objects": [{"Key": "memgpt.pdf.metadata.json"}, {"Key": "mrl.pdf.metadata.json"}]}
)["Deleted"]

[{'Key': 'mrl.pdf.metadata.json'}, {'Key': 'memgpt.pdf.metadata.json'}]

In [6]:
s3.list_objects_v2(Bucket="kendra-filter")["Contents"]

[{'Key': 'memgpt.pdf',
  'LastModified': datetime.datetime(2024, 7, 16, 21, 7, 39, tzinfo=tzlocal()),
  'ETag': '"16e65f7cc5c1d647eb6acffdf085ce62"',
  'Size': 663708,
  'StorageClass': 'STANDARD'},
 {'Key': 'mrl.pdf',
  'LastModified': datetime.datetime(2024, 7, 16, 21, 7, 39, tzinfo=tzlocal()),
  'ETag': '"f3080a3cb92f92fb71fa3d7095b7c8e1"',
  'Size': 7006007,
  'StorageClass': 'STANDARD'}]

In [7]:
s3.upload_file(Filename="../assets/s3/memgpt.pdf.metadata.json", Bucket="kendra-filter", Key="memgpt.pdf.metadata.json")
s3.upload_file(Filename="../assets/s3/mrl.pdf.metadata.json", Bucket="kendra-filter", Key="mrl.pdf.metadata.json")

### Kendra

In [8]:
kendra = session.client(service_name="kendra")
kendra_index_id = kendra.list_indices()["IndexConfigurationSummaryItems"][0]["Id"]

#### Mismo DataSource (S3)

In [9]:
# Sin UserContext
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
)
for r in result["ResultItems"]:
    print(r)

{'Id': '81952f7d-68fe-4be7-88e3-f4883e779ca8-e91d0729-ec37-4704-892c-491640b34334', 'Type': 'ANSWER', 'Format': 'TEXT', 'AdditionalAttributes': [{'Key': 'AnswerText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': 'nputs.\n\n\n5. Conclusion\nIn this paper, we introduced MemGPT, a novel LLM sys-\ntem inspired by operating systems to manage the limited\ncontext windows of large language models. By designing a\nmemory hierarchy and control flow analogous to traditional\nOSes, MemGPT provides the illusion of larger context re-\nsources for LLMs. This OS-inspired approach was eval-\nuated in two domains where existing LLM performance\nis constrained by finite context lengths: document anal-\nysis and conversational agents. For document analysis,\nMemGPT could process lengthy texts well beyond the con-\ntext limits of current LLMs by effectively paging relevant\ncontext in and out o', 'Highlights': [{'BeginOffset': 51, 'EndOffset': 164, 'TopAnswer': 

In [10]:
# Con UserContext + Grupo permitido
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
    UserContext={
        "Groups": ["analysts"]
    }
)
for r in result["ResultItems"]:
    print(r)

{'Id': 'd7d97f61-567f-4719-8969-031e6043be18-f571282d-128b-40db-af11-afa1ecd782de', 'Type': 'ANSWER', 'Format': 'TEXT', 'AdditionalAttributes': [{'Key': 'AnswerText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': 'nputs.\n\n\n5. Conclusion\nIn this paper, we introduced MemGPT, a novel LLM sys-\ntem inspired by operating systems to manage the limited\ncontext windows of large language models. By designing a\nmemory hierarchy and control flow analogous to traditional\nOSes, MemGPT provides the illusion of larger context re-\nsources for LLMs. This OS-inspired approach was eval-\nuated in two domains where existing LLM performance\nis constrained by finite context lengths: document anal-\nysis and conversational agents. For document analysis,\nMemGPT could process lengthy texts well beyond the con-\ntext limits of current LLMs by effectively paging relevant\ncontext in and out o', 'Highlights': [{'BeginOffset': 51, 'EndOffset': 164, 'TopAnswer': 

In [11]:
# Con UserContext + Grupo no permitido
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
    UserContext={
        "Groups": ["interns"]
    }
)
for r in result["ResultItems"]:
    print(r)

In [12]:
# Con UserContext + Grupo no especificado
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
    UserContext={
        "Groups": ["others"]
    }
)
for r in result["ResultItems"]:
    print(r)

In [14]:
# Con UserContext + Usuario no especificado
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
    UserContext={
        "UserId": "CEO"
    }
)
for r in result["ResultItems"]:
    print(r)

In [15]:
# Con UserContext + Usuario no especificado y Grupo correcto
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is MemGPT?",
    UserContext={
        "UserId": "CEO",
        "Groups": ["analysts"]
    }
)
for r in result["ResultItems"]:
    print(r)

{'Id': '84b1018d-88ea-4d79-8163-63f47696f882-6efe0b8d-f05a-4b54-8785-79b5c174d812', 'Type': 'ANSWER', 'Format': 'TEXT', 'AdditionalAttributes': [{'Key': 'AnswerText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': 'nputs.\n\n\n5. Conclusion\nIn this paper, we introduced MemGPT, a novel LLM sys-\ntem inspired by operating systems to manage the limited\ncontext windows of large language models. By designing a\nmemory hierarchy and control flow analogous to traditional\nOSes, MemGPT provides the illusion of larger context re-\nsources for LLMs. This OS-inspired approach was eval-\nuated in two domains where existing LLM performance\nis constrained by finite context lengths: document anal-\nysis and conversational agents. For document analysis,\nMemGPT could process lengthy texts well beyond the con-\ntext limits of current LLMs by effectively paging relevant\ncontext in and out o', 'Highlights': [{'BeginOffset': 51, 'EndOffset': 164, 'TopAnswer': 

#### Diferentes data sources (Ambos S3 Connectors)

In [19]:
kendra.list_data_sources(IndexId=kendra_index_id)["SummaryItems"]

[{'Name': 's3-filter-datasource2',
  'Id': '25f58b56-387b-4419-8e06-9e4dc2c0bd83',
  'Type': 'S3',
  'CreatedAt': datetime.datetime(2024, 7, 17, 16, 20, 58, 572000, tzinfo=tzlocal()),
  'UpdatedAt': datetime.datetime(2024, 7, 17, 16, 20, 58, 572000, tzinfo=tzlocal()),
  'Status': 'ACTIVE',
  'LanguageCode': 'en'},
 {'Name': 's3-filter-datasource3',
  'Id': '561388be-8466-4aad-a678-ee41e3217ae3',
  'Type': 'S3',
  'CreatedAt': datetime.datetime(2024, 7, 17, 16, 59, 22, 391000, tzinfo=tzlocal()),
  'UpdatedAt': datetime.datetime(2024, 7, 17, 17, 7, 16, 217000, tzinfo=tzlocal()),
  'Status': 'ACTIVE',
  'LanguageCode': 'en'}]

In [16]:
kendra.list_access_control_configurations(IndexId=kendra_index_id)["AccessControlConfigurations"]

[]

In [21]:
kendra.create_access_control_configuration(
    IndexId=kendra_index_id,
    Name="Testing",
    AccessControlList=[
        {
            "Access": "ALLOW",
            "Name": "HR",
            "Type": "GROUP",
            "DataSourceId": "25f58b56-387b-4419-8e06-9e4dc2c0bd83"
        },
        {
            "Access": "ALLOW",
            "Name": "TECH",
            "Type": "GROUP",
            "DataSourceId": "561388be-8466-4aad-a678-ee41e3217ae3"
        },
    ]
)

{'Id': '4oaxyxst',
 'ResponseMetadata': {'RequestId': 'cb6abd9c-53da-4249-9227-23202ae51d3d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cb6abd9c-53da-4249-9227-23202ae51d3d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '17',
   'date': 'Wed, 17 Jul 2024 17:37:33 GMT',
   'connection': 'close'},
  'RetryAttempts': 0}}

In [23]:
kendra.list_access_control_configurations(IndexId=kendra_index_id)["AccessControlConfigurations"]

[{'Id': '4oaxyxst'}]

In [27]:
result = kendra.query(
    IndexId=kendra_index_id,
    QueryText="What is PECCARY",
    UserContext={
        "Groups": ["HR"]
    }
)
for r in result["ResultItems"]:
    print(r)

{'Id': '7c5b44d8-adfd-4a3b-978c-1ce808d4bec9-ed24ebda-b9ef-4066-9e15-52cbd24865b1', 'Type': 'ANSWER', 'Format': 'TEXT', 'AdditionalAttributes': [{'Key': 'AnswerText', 'ValueType': 'TEXT_WITH_HIGHLIGHTS_VALUE', 'Value': {'TextWithHighlightsValue': {'Text': 'Keywords: Theoretical techniques(2093) — Galaxy dynamics(591) – Orbits(1184) — Orbit determi-\n\n\nnation(1175) — Time series analysis(1916) — Exoplanet dynamics(490)\n\n\n1. INTRODUCTION\n\n\nPermutation Entropy and statistiCal Complexity\n\n\nAnalysis for astRophYsics (PECCARY) is a statistical\n\n\nmethod used to characterize a time-series as regular,\n\n\nstochastic (i.e., random or noisy), or complex, and iden-\n\n\ntify its relevant timescales (Bandt & Pompe 2002; Rosso\n\n\net al. 2007; Weck et al. 2015).', 'Highlights': [{'BeginOffset': 184, 'EndOffset': 258, 'TopAnswer': False, 'Type': 'STANDARD'}, {'BeginOffset': 260, 'EndOffset': 267, 'TopAnswer': False, 'Type': 'STANDARD'}]}}}], 'DocumentId': 's3://kendra-filter-2/2407.11