diff --git a/lib/aws-genai-llm-chatbot-stack.ts b/lib/aws-genai-llm-chatbot-stack.ts index aae8a35f..7e204370 100644 --- a/lib/aws-genai-llm-chatbot-stack.ts +++ b/lib/aws-genai-llm-chatbot-stack.ts @@ -291,6 +291,9 @@ export class AwsGenAILLMChatbotStack extends cdk.Stack { `/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspaceFunction/ServiceRole/Resource`, `/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspaceFunction/ServiceRole/DefaultPolicy/Resource`, `/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspace/Role/DefaultPolicy/Resource`, + `/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocumentFunction/ServiceRole/Resource`, + `/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocumentFunction/ServiceRole/DefaultPolicy/Resource`, + `/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocument/Role/DefaultPolicy/Resource`, `/${this.stackName}/RagEngines/DataImport/FileImportBatchJob/ManagedEc2EcsComputeEnvironment/InstanceProfileRole/Resource`, `/${this.stackName}/RagEngines/DataImport/WebCrawlerBatchJob/WebCrawlerManagedEc2EcsComputeEnvironment/InstanceProfileRole/Resource`, `/${this.stackName}/BucketNotificationsHandler050a0587b7544547bf325f094a3db834/Role/Resource`, diff --git a/lib/chatbot-api/functions/api-handler/routes/documents.py b/lib/chatbot-api/functions/api-handler/routes/documents.py index 9f56af8d..d7424480 100644 --- a/lib/chatbot-api/functions/api-handler/routes/documents.py +++ b/lib/chatbot-api/functions/api-handler/routes/documents.py @@ -65,6 +65,9 @@ class GetDocumentRequest(BaseModel): workspaceId: str documentId: str +class DeleteDocumentRequest(BaseModel): + workspaceId: str + documentId: str class GetRssPostsRequest(BaseModel): workspaceId: str @@ -132,6 +135,13 @@ def get_documents(input: dict): "lastDocumentId": result["last_document_id"], } +@router.resolver(field_name="deleteDocument") +@tracer.capture_method +def delete_document(input: dict): + request = DeleteDocumentRequest(**input) + result = genai_core.documents.delete_document(request.workspaceId, request.documentId) + + return result @router.resolver(field_name="getDocument") @tracer.capture_method diff --git a/lib/chatbot-api/rest-api.ts b/lib/chatbot-api/rest-api.ts index a0ac6271..ca65d1b0 100644 --- a/lib/chatbot-api/rest-api.ts +++ b/lib/chatbot-api/rest-api.ts @@ -85,6 +85,8 @@ export class ApiResolvers extends Construct { ?.attrEndpointName ?? "", DELETE_WORKSPACE_WORKFLOW_ARN: props.ragEngines?.deleteWorkspaceWorkflow?.stateMachineArn ?? "", + DELETE_DOCUMENT_WORKFLOW_ARN: + props.ragEngines?.deleteDocumentWorkflow?.stateMachineArn ?? "", CREATE_AURORA_WORKSPACE_WORKFLOW_ARN: props.ragEngines?.auroraPgVector?.createAuroraWorkspaceWorkflow ?.stateMachineArn ?? "", @@ -225,6 +227,10 @@ export class ApiResolvers extends Construct { ); } + if (props.ragEngines?.deleteDocumentWorkflow) { + props.ragEngines.deleteDocumentWorkflow.grantStartExecution(apiHandler); + } + if (props.ragEngines?.sageMakerRagModels) { apiHandler.addToRolePolicy( new iam.PolicyStatement({ diff --git a/lib/chatbot-api/schema/schema.graphql b/lib/chatbot-api/schema/schema.graphql index a484c5ea..f5a8f42c 100644 --- a/lib/chatbot-api/schema/schema.graphql +++ b/lib/chatbot-api/schema/schema.graphql @@ -91,6 +91,11 @@ type DocumentResult @aws_cognito_user_pools { status: String } +type DeleteDocumentResult @aws_cognito_user_pools { + documentId: String! + deleted: Boolean! +} + type UserFeedbackResult @aws_cognito_user_pools { feedback_id: String! } @@ -242,6 +247,11 @@ type SessionHistoryItem @aws_cognito_user_pools { metadata: String } +input DeleteDocumentInput { + workspaceId: String! + documentId: String! +} + input UserFeedbackInput { sessionId: String! key: Int! @@ -311,9 +321,11 @@ type Mutation { @aws_cognito_user_pools startKendraDataSync(workspaceId: String!): Boolean @aws_cognito_user_pools deleteWorkspace(workspaceId: String!): Boolean @aws_cognito_user_pools + deleteDocument(input: DeleteDocumentInput!): DeleteDocumentResult @aws_cognito_user_pools addTextDocument(input: TextDocumentInput!): DocumentResult @aws_cognito_user_pools - addUserFeedback(input: UserFeedbackInput!): UserFeedbackResult @aws_cognito_user_pools + addUserFeedback(input: UserFeedbackInput!): UserFeedbackResult + @aws_cognito_user_pools addQnADocument(input: QnADocumentInput!): DocumentResult @aws_cognito_user_pools setDocumentSubscriptionStatus( @@ -366,4 +378,4 @@ schema { query: Query mutation: Mutation subscription: Subscription -} \ No newline at end of file +} diff --git a/lib/rag-engines/data-import/functions/upload-handler/index.py b/lib/rag-engines/data-import/functions/upload-handler/index.py index a7287866..1238e1ca 100644 --- a/lib/rag-engines/data-import/functions/upload-handler/index.py +++ b/lib/rag-engines/data-import/functions/upload-handler/index.py @@ -56,6 +56,7 @@ def process_record(record): workspace_id=workspace_id, document_type="file", path=file_name, + title=file_name, size_in_bytes=object_size, ) diff --git a/lib/rag-engines/index.ts b/lib/rag-engines/index.ts index f89dc688..55e99059 100644 --- a/lib/rag-engines/index.ts +++ b/lib/rag-engines/index.ts @@ -33,6 +33,7 @@ export class RagEngines extends Construct { public readonly fileImportWorkflow?: sfn.StateMachine; public readonly websiteCrawlingWorkflow?: sfn.StateMachine; public readonly deleteWorkspaceWorkflow?: sfn.StateMachine; + public readonly deleteDocumentWorkflow?: sfn.StateMachine; public readonly dataImport: DataImport; constructor(scope: Construct, id: string, props: RagEnginesProps) { @@ -118,6 +119,7 @@ export class RagEngines extends Construct { this.fileImportWorkflow = dataImport.fileImportWorkflow; this.websiteCrawlingWorkflow = dataImport.websiteCrawlingWorkflow; this.deleteWorkspaceWorkflow = workspaces.deleteWorkspaceWorkflow; + this.deleteDocumentWorkflow = workspaces.deleteDocumentWorkflow; this.dataImport = dataImport; } } diff --git a/lib/rag-engines/workspaces/delete-document.ts b/lib/rag-engines/workspaces/delete-document.ts new file mode 100644 index 00000000..e0a0c373 --- /dev/null +++ b/lib/rag-engines/workspaces/delete-document.ts @@ -0,0 +1,177 @@ +import * as cdk from "aws-cdk-lib"; +import * as iam from "aws-cdk-lib/aws-iam"; +import * as lambda from "aws-cdk-lib/aws-lambda"; +import * as logs from "aws-cdk-lib/aws-logs"; +import * as sfn from "aws-cdk-lib/aws-stepfunctions"; +import * as tasks from "aws-cdk-lib/aws-stepfunctions-tasks"; +import { Construct } from "constructs"; +import * as path from "path"; +import { Shared } from "../../shared"; +import { SystemConfig } from "../../shared/types"; +import { AuroraPgVector } from "../aurora-pgvector"; +import { DataImport } from "../data-import"; +import { KendraRetrieval } from "../kendra-retrieval"; +import { OpenSearchVector } from "../opensearch-vector"; +import { RagDynamoDBTables } from "../rag-dynamodb-tables"; +import { RemovalPolicy } from "aws-cdk-lib"; + +export interface DeleteDocumentProps { + readonly config: SystemConfig; + readonly shared: Shared; + readonly dataImport: DataImport; + readonly ragDynamoDBTables: RagDynamoDBTables; + readonly auroraPgVector?: AuroraPgVector; + readonly openSearchVector?: OpenSearchVector; + readonly kendraRetrieval?: KendraRetrieval; +} + +export class DeleteDocument extends Construct { + public readonly stateMachine?: sfn.StateMachine; + + constructor(scope: Construct, id: string, props: DeleteDocumentProps) { + super(scope, id); + + const deleteFunction = new lambda.Function(this, "DeleteDocumentFunction", { + vpc: props.shared.vpc, + code: props.shared.sharedCode.bundleWithLambdaAsset( + path.join(__dirname, "./functions/delete-document-workflow/delete") + ), + runtime: props.shared.pythonRuntime, + architecture: props.shared.lambdaArchitecture, + handler: "index.lambda_handler", + layers: [props.shared.powerToolsLayer, props.shared.commonLayer], + timeout: cdk.Duration.minutes(15), + logRetention: logs.RetentionDays.ONE_WEEK, + environment: { + ...props.shared.defaultEnvironmentVariables, + AURORA_DB_SECRET_ID: props.auroraPgVector?.database.secret + ?.secretArn as string, + UPLOAD_BUCKET_NAME: props.dataImport.uploadBucket.bucketName, + PROCESSING_BUCKET_NAME: props.dataImport.processingBucket.bucketName, + WORKSPACES_TABLE_NAME: + props.ragDynamoDBTables.workspacesTable.tableName, + WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME: + props.ragDynamoDBTables.workspacesByObjectTypeIndexName, + DOCUMENTS_TABLE_NAME: + props.ragDynamoDBTables?.documentsTable.tableName ?? "", + DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME: + props.ragDynamoDBTables?.documentsByCompoundKeyIndexName ?? "", + DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME: + props.kendraRetrieval?.kendraS3DataSourceBucket?.bucketName ?? "", + OPEN_SEARCH_COLLECTION_ENDPOINT: + props.openSearchVector?.openSearchCollectionEndpoint ?? "", + }, + }); + + if (props.auroraPgVector) { + props.auroraPgVector.database.secret?.grantRead(deleteFunction); + props.auroraPgVector.database.connections.allowDefaultPortFrom( + deleteFunction + ); + } + + if (props.openSearchVector) { + deleteFunction.addToRolePolicy( + new iam.PolicyStatement({ + actions: [ + "aoss:APIAccessAll", + "aoss:DescribeIndex", + "aoss:UpdateIndex", + ], + resources: [props.openSearchVector.openSearchCollection.attrArn], + }) + ); + + props.openSearchVector.addToAccessPolicy( + "delete-document", + [deleteFunction.role?.roleArn], + [ + "aoss:DescribeIndex", + "aoss:UpdateIndex", + "aoss:ReadDocument", + "aoss:WriteDocument", + ] + ); + } + + props.dataImport.uploadBucket.grantReadWrite(deleteFunction); + props.dataImport.processingBucket.grantReadWrite(deleteFunction); + props.kendraRetrieval?.kendraS3DataSourceBucket?.grantReadWrite( + deleteFunction + ); + props.ragDynamoDBTables.workspacesTable.grantReadWriteData(deleteFunction); + props.ragDynamoDBTables.documentsTable.grantReadWriteData(deleteFunction); + + const handleError = new tasks.DynamoUpdateItem(this, "HandleError", { + table: props.ragDynamoDBTables.documentsTable, + key: { + workspace_id: tasks.DynamoAttributeValue.fromString( + sfn.JsonPath.stringAt("$.workspace_id") + ), + document_id: tasks.DynamoAttributeValue.fromString( + sfn.JsonPath.stringAt("$.document_id") + ), + }, + updateExpression: "set #status = :error", + expressionAttributeNames: { + "#status": "status", + }, + expressionAttributeValues: { + ":error": tasks.DynamoAttributeValue.fromString("error"), + }, + }).next( + new sfn.Fail(this, "Fail", { + cause: "Document deletion failed", + }) + ); + + const setDeleting = new tasks.DynamoUpdateItem(this, "SetDeleting", { + table: props.ragDynamoDBTables.documentsTable, + key: { + workspace_id: tasks.DynamoAttributeValue.fromString( + sfn.JsonPath.stringAt("$.workspace_id") + ), + document_id: tasks.DynamoAttributeValue.fromString( + sfn.JsonPath.stringAt("$.document_id") + ), + }, + updateExpression: "set #status=:statusValue", + expressionAttributeNames: { + "#status": "status", + }, + expressionAttributeValues: { + ":statusValue": tasks.DynamoAttributeValue.fromString("deleting"), + }, + resultPath: sfn.JsonPath.DISCARD, + }); + + const deleteTask = new tasks.LambdaInvoke(this, "Delete", { + lambdaFunction: deleteFunction, + resultPath: "$.deleteResult", + }).addCatch(handleError, { + errors: ["States.ALL"], + resultPath: "$.deleteResult", + }); + + const workflow = setDeleting + .next(deleteTask) + .next(new sfn.Succeed(this, "Success")); + + const logGroup = new logs.LogGroup(this, "DeleteDocumentSMLogGroup", { + removalPolicy: RemovalPolicy.DESTROY, + }); + + const stateMachine = new sfn.StateMachine(this, "DeleteDocument", { + definitionBody: sfn.DefinitionBody.fromChainable(workflow), + timeout: cdk.Duration.minutes(5), + comment: "Delete Document Workflow", + tracingEnabled: true, + logs: { + destination: logGroup, + level: sfn.LogLevel.ALL, + }, + }); + + this.stateMachine = stateMachine; + } +} diff --git a/lib/rag-engines/workspaces/functions/delete-document-workflow/delete/index.py b/lib/rag-engines/workspaces/functions/delete-document-workflow/delete/index.py new file mode 100644 index 00000000..73778c9b --- /dev/null +++ b/lib/rag-engines/workspaces/functions/delete-document-workflow/delete/index.py @@ -0,0 +1,32 @@ +import genai_core.types +import genai_core.workspaces +import genai_core.documents +import genai_core.aurora.delete +import genai_core.opensearch.delete +import genai_core.kendra.delete +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities.typing import LambdaContext + +logger = Logger() + + +@logger.inject_lambda_context(log_event=True) +def lambda_handler(event, context: LambdaContext): + workspace_id = event["workspace_id"] + document_id = event["document_id"] + workspace = genai_core.workspaces.get_workspace(workspace_id) + if workspace is None: + raise genai_core.types.CommonError("Workspace not found") + + document = genai_core.documents.get_document(workspace_id, document_id) + if document is None: + raise genai_core.types.CommonError("Document not found") + + if workspace["engine"] == "opensearch": + genai_core.opensearch.delete.delete_open_search_document(workspace_id, document) + elif workspace["engine"] == "aurora": + genai_core.aurora.delete.delete_aurora_document(workspace_id, document) + elif workspace["engine"] == "kendra": + genai_core.kendra.delete.delete_kendra_document(workspace_id, document) + else: + raise genai_core.types.CommonError("Workspace engine not supported") \ No newline at end of file diff --git a/lib/rag-engines/workspaces/index.ts b/lib/rag-engines/workspaces/index.ts index 4605a8e9..c9dd9b7d 100644 --- a/lib/rag-engines/workspaces/index.ts +++ b/lib/rag-engines/workspaces/index.ts @@ -8,6 +8,7 @@ import { KendraRetrieval } from "../kendra-retrieval"; import { OpenSearchVector } from "../opensearch-vector"; import { RagDynamoDBTables } from "../rag-dynamodb-tables"; import { DeleteWorkspace } from "./delete-workspace"; +import { DeleteDocument } from "./delete-document"; export interface WorkkspacesProps { readonly config: SystemConfig; @@ -21,11 +22,26 @@ export interface WorkkspacesProps { export class Workspaces extends Construct { public readonly deleteWorkspaceWorkflow?: sfn.StateMachine; + public readonly deleteDocumentWorkflow?: sfn.StateMachine; constructor(scope: Construct, id: string, props: WorkkspacesProps) { super(scope, id); - const workflow = new DeleteWorkspace(this, "DeleteWorkspace", { + const deleteWorkspaceWorkflow = new DeleteWorkspace( + this, + "DeleteWorkspace", + { + config: props.config, + shared: props.shared, + dataImport: props.dataImport, + ragDynamoDBTables: props.ragDynamoDBTables, + auroraPgVector: props.auroraPgVector, + openSearchVector: props.openSearchVector, + kendraRetrieval: props.kendraRetrieval, + } + ); + + const deleteDocumentWorkflow = new DeleteDocument(this, "DeleteDocument", { config: props.config, shared: props.shared, dataImport: props.dataImport, @@ -35,6 +51,7 @@ export class Workspaces extends Construct { kendraRetrieval: props.kendraRetrieval, }); - this.deleteWorkspaceWorkflow = workflow.stateMachine; + this.deleteWorkspaceWorkflow = deleteWorkspaceWorkflow.stateMachine; + this.deleteDocumentWorkflow = deleteDocumentWorkflow.stateMachine; } } diff --git a/lib/shared/layers/common/requirements.txt b/lib/shared/layers/common/requirements.txt index 17d18ee8..26ebbeac 100644 --- a/lib/shared/layers/common/requirements.txt +++ b/lib/shared/layers/common/requirements.txt @@ -6,7 +6,7 @@ aws_requests_auth==0.4.3 requests-aws4auth==1.2.3 langchain==0.1.17 langchain-community==0.0.36 -opensearch-py==2.3.1 +opensearch-py==2.4.2 psycopg2-binary==2.9.7 pgvector==0.2.2 pydantic==2.4.0 diff --git a/lib/shared/layers/python-sdk/python/genai_core/aurora/delete.py b/lib/shared/layers/python-sdk/python/genai_core/aurora/delete.py index 77733ed9..6df615e1 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/aurora/delete.py +++ b/lib/shared/layers/python-sdk/python/genai_core/aurora/delete.py @@ -1,8 +1,13 @@ import os import boto3 +from botocore.exceptions import BotoCoreError, ClientError import genai_core.utils.delete_files_with_prefix +import genai_core.utils.delete_files_with_object_key +import genai_core.types +import psycopg2 from psycopg2 import sql from genai_core.aurora.connection import AuroraConnection +from datetime import datetime PROCESSING_BUCKET_NAME = os.environ["PROCESSING_BUCKET_NAME"] UPLOAD_BUCKET_NAME = os.environ["UPLOAD_BUCKET_NAME"] @@ -68,3 +73,66 @@ def delete_aurora_workspace(workspace: dict): ) print(f"Delete Item succeeded: {response}") + +def delete_aurora_document(workspace_id: str, document: dict): + table_name = sql.Identifier(workspace_id.replace("-", "")) + document_id = document["document_id"] + document_vectors = document["vectors"] + documents_diff = 1 + document_size_in_bytes = document["size_in_bytes"] + timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + if document["path"]: + upload_bucket_key = workspace_id + "/" + document["path"] + genai_core.utils.delete_files_with_object_key.delete_files_with_object_key( + UPLOAD_BUCKET_NAME, upload_bucket_key + ) + + processing_bucket_key = workspace_id + "/" + document_id + + genai_core.utils.delete_files_with_prefix.delete_files_with_prefix( + PROCESSING_BUCKET_NAME, processing_bucket_key + ) + + deleteAuroraDocument(document_id, table_name) + + documents_table = dynamodb.Table(DOCUMENTS_TABLE_NAME) + workspaces_table = dynamodb.Table(WORKSPACES_TABLE_NAME) + + try: + response = documents_table.delete_item( + Key={ + "workspace_id": workspace_id, + "document_id": document_id, + } + ) + print(f"Delete document succeeded: {response}") + + updateResponse = workspaces_table.update_item( + Key={"workspace_id": workspace_id, + "object_type": WORKSPACE_OBJECT_TYPE}, + UpdateExpression="ADD size_in_bytes :incrementValue, documents :documentsIncrementValue, vectors :vectorsIncrementValue SET updated_at=:timestampValue", + ExpressionAttributeValues={ + ":incrementValue": -document_size_in_bytes, + ":documentsIncrementValue": -documents_diff, + ":vectorsIncrementValue": -document_vectors, + ":timestampValue": timestamp, + }, + ReturnValues="UPDATED_NEW", + ) + print(f"Workspaces table updated for the document: {updateResponse}") + + except (BotoCoreError, ClientError) as error: + print(f"An error occurred: {error}") + +def deleteAuroraDocument(document_id: str, table_name: str): + try: + with AuroraConnection(autocommit=False) as cursor: + cursor.execute( + sql.SQL("DELETE FROM {table} WHERE document_id = %s").format(table=table_name), + (document_id,), + ) + cursor.connection.commit() + print(f"Deleted document {document_id} from {table_name}") + except psycopg2.Error as e: + print(f"An error occurred while deleting document from Aurora table: {e}") diff --git a/lib/shared/layers/python-sdk/python/genai_core/documents.py b/lib/shared/layers/python-sdk/python/genai_core/documents.py index e7c153d1..dc725236 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/documents.py +++ b/lib/shared/layers/python-sdk/python/genai_core/documents.py @@ -27,6 +27,7 @@ "DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME" ) +DELETE_DOCUMENT_WORKFLOW_ARN = os.environ.get("DELETE_DOCUMENT_WORKFLOW_ARN") RSS_FEED_INGESTOR_FUNCTION = os.environ.get("RSS_FEED_INGESTOR_FUNCTION", "") RSS_FEED_SCHEDULE_ROLE_ARN = os.environ.get("RSS_FEED_SCHEDULE_ROLE_ARN", "") DOCUMENTS_BY_STATUS_INDEX = os.environ.get("DOCUMENTS_BY_STATUS_INDEX", "") @@ -172,6 +173,31 @@ def get_document(workspace_id: str, document_id: str): return document +def delete_document(workspace_id: str, document_id: str): + response = documents_table.get_item( + Key={"workspace_id": workspace_id, "document_id": document_id} + ) + + document = response.get("Item") + + if not document: + raise genai_core.types.CommonError("Document not found") + + if document["status"] != "processed" and document["status"] != "error": + raise genai_core.types.CommonError("Document not ready for deletion") + + response = sfn_client.start_execution( + stateMachineArn=DELETE_DOCUMENT_WORKFLOW_ARN, + input=json.dumps( + { + "workspace_id": workspace_id, + "document_id": document_id, + } + ), + ) + + print(response) + return {"documentId": document_id, "deleted": True} def get_document_content(workspace_id: str, document_id: str): content_key = f"{workspace_id}/{document_id}/content.txt" diff --git a/lib/shared/layers/python-sdk/python/genai_core/kendra/delete.py b/lib/shared/layers/python-sdk/python/genai_core/kendra/delete.py index f8d6f8a3..130b54ea 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/kendra/delete.py +++ b/lib/shared/layers/python-sdk/python/genai_core/kendra/delete.py @@ -1,6 +1,8 @@ import os import boto3 +from botocore.exceptions import BotoCoreError, ClientError import genai_core.utils.delete_files_with_prefix +from datetime import datetime PROCESSING_BUCKET_NAME = os.environ["PROCESSING_BUCKET_NAME"] UPLOAD_BUCKET_NAME = os.environ["UPLOAD_BUCKET_NAME"] @@ -70,3 +72,68 @@ def delete_kendra_workspace(workspace: dict): ) print(f"Delete Item succeeded: {response}") + +def delete_kendra_document(workspace_id: str, document: dict): + document_id = document["document_id"] + document_vectors = document["vectors"] + documents_diff = 1 + document_size_in_bytes = document["size_in_bytes"] + timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + document_type = document["document_type"] + + if document["path"]: + upload_bucket_key = workspace_id + "/" + document["path"] + genai_core.utils.delete_files_with_object_key.delete_files_with_object_key( + UPLOAD_BUCKET_NAME, upload_bucket_key + ) + + processing_bucket_key = workspace_id + "/" + document_id + + genai_core.utils.delete_files_with_prefix.delete_files_with_prefix( + PROCESSING_BUCKET_NAME, processing_bucket_key + ) + + deleteKendraDocument(workspace_id, document_id, document_type) + + documents_table = dynamodb.Table(DOCUMENTS_TABLE_NAME) + workspaces_table = dynamodb.Table(WORKSPACES_TABLE_NAME) + + try: + response = documents_table.delete_item( + Key={ + "workspace_id": workspace_id, + "document_id": document_id, + } + ) + print(f"Delete document succeeded: {response}") + + updateResponse = workspaces_table.update_item( + Key={"workspace_id": workspace_id, + "object_type": WORKSPACE_OBJECT_TYPE}, + UpdateExpression="ADD size_in_bytes :incrementValue, documents :documentsIncrementValue, vectors :vectorsIncrementValue SET updated_at=:timestampValue", + ExpressionAttributeValues={ + ":incrementValue": -document_size_in_bytes, + ":documentsIncrementValue": -documents_diff, + ":vectorsIncrementValue": -document_vectors, + ":timestampValue": timestamp, + }, + ReturnValues="UPDATED_NEW", + ) + print(f"Workspaces table updated for the document: {updateResponse}") + + except (BotoCoreError, ClientError) as error: + print(f"An error occurred: {error}") + +def deleteKendraDocument(workspace_id, document_id, document_type): + if document_type == "text": + processing_object_key = f"{workspace_id}/{document_id}/content.txt" + kendra_object_key = f"documents/{processing_object_key}" + kendra_metadata_key = ( + f"metadata/documents/{processing_object_key}.metadata.json" + ) + genai_core.utils.delete_files_with_prefix.delete_files_with_prefix( + DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME, kendra_object_key + ) + genai_core.utils.delete_files_with_prefix.delete_files_with_prefix( + DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME, kendra_metadata_key + ) diff --git a/lib/shared/layers/python-sdk/python/genai_core/opensearch/delete.py b/lib/shared/layers/python-sdk/python/genai_core/opensearch/delete.py index b27f58a5..954bc3a4 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/opensearch/delete.py +++ b/lib/shared/layers/python-sdk/python/genai_core/opensearch/delete.py @@ -1,7 +1,11 @@ import os import boto3 +from botocore.exceptions import BotoCoreError, ClientError from .client import get_open_search_client import genai_core.utils.delete_files_with_prefix +import genai_core.utils.delete_files_with_object_key +import genai_core.types +from datetime import datetime PROCESSING_BUCKET_NAME = os.environ["PROCESSING_BUCKET_NAME"] @@ -70,3 +74,84 @@ def delete_open_search_workspace(workspace: dict): ) print(f"Delete Item succeeded: {response}") + + +def delete_open_search_document(workspace_id: str, document: dict): + index_name = workspace_id.replace("-", "") + document_id = document["document_id"] + document_vectors = document["vectors"] + documents_diff = 1 + document_size_in_bytes = document["size_in_bytes"] + timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%fZ") + + if document["path"]: + upload_bucket_key = workspace_id + "/" + document["path"] + genai_core.utils.delete_files_with_object_key.delete_files_with_object_key( + UPLOAD_BUCKET_NAME, upload_bucket_key + ) + + processing_bucket_key = workspace_id + "/" + document_id + + genai_core.utils.delete_files_with_prefix.delete_files_with_prefix( + PROCESSING_BUCKET_NAME, processing_bucket_key + ) + + deleteOpenSearchDocument(document_id, index_name) + + documents_table = dynamodb.Table(DOCUMENTS_TABLE_NAME) + workspaces_table = dynamodb.Table(WORKSPACES_TABLE_NAME) + + try: + response = documents_table.delete_item( + Key={ + "workspace_id": workspace_id, + "document_id": document_id, + } + ) + print(f"Delete document succeeded: {response}") + + updateResponse = workspaces_table.update_item( + Key={"workspace_id": workspace_id, + "object_type": WORKSPACE_OBJECT_TYPE}, + UpdateExpression="ADD size_in_bytes :incrementValue, documents :documentsIncrementValue, vectors :vectorsIncrementValue SET updated_at=:timestampValue", + ExpressionAttributeValues={ + ":incrementValue": -document_size_in_bytes, + ":documentsIncrementValue": -documents_diff, + ":vectorsIncrementValue": -document_vectors, + ":timestampValue": timestamp, + }, + ReturnValues="UPDATED_NEW", + ) + print(f"Workspaces table updated for the document: {updateResponse}") + + except (BotoCoreError, ClientError) as error: + print(f"An error occurred: {error}") + + + +def deleteOpenSearchDocument(document_id, index_name): + client = get_open_search_client() + if client.indices.exists(index_name): + search_query = { + "query": { + "match": { + "document_id": document_id + } + } + } + from_ = 0 + batch_size = 100 + while True: + search_response = client.search( + index=index_name, body=search_query, from_=from_, size=batch_size) + + hits = search_response['hits']['hits'] + if not hits: + break + + for hit in hits: + client.delete(index=index_name, id=hit['_id']) + + from_ += batch_size + + print(f"Record {document_id} deleted.") diff --git a/lib/shared/layers/python-sdk/python/genai_core/utils/delete_files_with_object_key.py b/lib/shared/layers/python-sdk/python/genai_core/utils/delete_files_with_object_key.py new file mode 100644 index 00000000..81a9fd45 --- /dev/null +++ b/lib/shared/layers/python-sdk/python/genai_core/utils/delete_files_with_object_key.py @@ -0,0 +1,6 @@ +import boto3 + +def delete_files_with_object_key (bucket_name, object_key): + s3_client = boto3.client("s3") + s3_client.delete_object(Bucket=bucket_name, Key=object_key) + print(f"Deleted {object_key} from {bucket_name}.") diff --git a/lib/user-interface/react-app/src/common/api-client/documents-client.ts b/lib/user-interface/react-app/src/common/api-client/documents-client.ts index 59d3c050..7c60b505 100644 --- a/lib/user-interface/react-app/src/common/api-client/documents-client.ts +++ b/lib/user-interface/react-app/src/common/api-client/documents-client.ts @@ -12,6 +12,7 @@ import { addTextDocument, addWebsite, setDocumentSubscriptionStatus, + deleteDocument, } from "../../graphql/mutations"; import { AddQnADocumentMutation, @@ -24,6 +25,7 @@ import { GetRSSPostsQuery, GetUploadFileURLQuery, UpdateRssFeedMutation, + DeleteDocumentMutation, } from "../../API"; import { RagDocumentType } from "../types"; @@ -243,4 +245,20 @@ export class DocumentsClient { }); return result; } + + async deleteDocument( + workspaceId: string, + documentId: string + ): Promise>> { + const result = API.graphql>({ + query: deleteDocument, + variables: { + input: { + workspaceId, + documentId, + }, + }, + }); + return result; + } } diff --git a/lib/user-interface/react-app/src/components/rag/document-delete-modal.tsx b/lib/user-interface/react-app/src/components/rag/document-delete-modal.tsx new file mode 100644 index 00000000..680a7485 --- /dev/null +++ b/lib/user-interface/react-app/src/components/rag/document-delete-modal.tsx @@ -0,0 +1,59 @@ +import { + Modal, + Box, + SpaceBetween, + Button, + Alert, +} from "@cloudscape-design/components"; +import { Document } from "../../API"; + +export interface DocumentDeleteModalProps { + visible: boolean; + document?: Document; + onDelete: () => void; + onDiscard: () => void; +} + +export default function DocumentDeleteModal(props: DocumentDeleteModalProps) { + return ( + + + + + + + } + > + {props.document && ( + + + Permanently delete document{" "} + + {props.document.title} + + ? You can't undo this action. + + Document Id: {props.document.id} + + Proceeding with this action will delete the document with all its + content. + + + )} + + ); +} diff --git a/lib/user-interface/react-app/src/pages/rag/workspace/columns.tsx b/lib/user-interface/react-app/src/pages/rag/workspace/columns.tsx index d105d4d0..1d994233 100644 --- a/lib/user-interface/react-app/src/pages/rag/workspace/columns.tsx +++ b/lib/user-interface/react-app/src/pages/rag/workspace/columns.tsx @@ -1,8 +1,9 @@ -import { Link, StatusIndicator } from "@cloudscape-design/components"; +import { Button, Link, StatusIndicator } from "@cloudscape-design/components"; import { RagDocumentType } from "../../../common/types"; import { Labels } from "../../../common/constants"; import { DateTime } from "luxon"; import { Utils } from "../../../common/utils"; +import "../../../styles/app.scss"; import { Document } from "../../../API"; const FILES_COLUMN_DEFINITIONS = [ @@ -161,18 +162,34 @@ const WEBSITES_COLUMN_DEFINITIONS = [ }, ]; -export function getColumnDefinition(documentType: RagDocumentType) { +export function getColumnDefinition( + documentType: RagDocumentType, + handleDelete: Function +) { + const commonColumns = [ + { + id: "deleteButton", + header: "Delete", + cell: (item: Document) => ( +