Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added delete document functionality #464

Merged
merged 8 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/aws-genai-llm-chatbot-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ export class AwsGenAILLMChatbotStack extends cdk.Stack {
`/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspaceFunction/ServiceRole/Resource`,
`/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspaceFunction/ServiceRole/DefaultPolicy/Resource`,
`/${this.stackName}/RagEngines/Workspaces/DeleteWorkspace/DeleteWorkspace/Role/DefaultPolicy/Resource`,
`/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocumentFunction/ServiceRole/Resource`,
`/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocumentFunction/ServiceRole/DefaultPolicy/Resource`,
`/${this.stackName}/RagEngines/Workspaces/DeleteDocument/DeleteDocument/Role/DefaultPolicy/Resource`,
`/${this.stackName}/RagEngines/DataImport/FileImportBatchJob/ManagedEc2EcsComputeEnvironment/InstanceProfileRole/Resource`,
`/${this.stackName}/RagEngines/DataImport/WebCrawlerBatchJob/WebCrawlerManagedEc2EcsComputeEnvironment/InstanceProfileRole/Resource`,
`/${this.stackName}/BucketNotificationsHandler050a0587b7544547bf325f094a3db834/Role/Resource`,
Expand Down
10 changes: 10 additions & 0 deletions lib/chatbot-api/functions/api-handler/routes/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ class GetDocumentRequest(BaseModel):
workspaceId: str
documentId: str

class DeleteDocumentRequest(BaseModel):
workspaceId: str
documentId: str

class GetRssPostsRequest(BaseModel):
workspaceId: str
Expand Down Expand Up @@ -129,6 +132,13 @@ def get_documents(input: dict):
"lastDocumentId": result["last_document_id"],
}

@router.resolver(field_name="deleteDocument")
@tracer.capture_method
def delete_document(input: dict):
request = DeleteDocumentRequest(**input)
result = genai_core.documents.delete_document(request.workspaceId, request.documentId)

return result

@router.resolver(field_name="getDocument")
@tracer.capture_method
Expand Down
6 changes: 6 additions & 0 deletions lib/chatbot-api/rest-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ export class ApiResolvers extends Construct {
?.attrEndpointName ?? "",
DELETE_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.deleteWorkspaceWorkflow?.stateMachineArn ?? "",
DELETE_DOCUMENT_WORKFLOW_ARN:
props.ragEngines?.deleteDocumentWorkflow?.stateMachineArn ?? "",
CREATE_AURORA_WORKSPACE_WORKFLOW_ARN:
props.ragEngines?.auroraPgVector?.createAuroraWorkspaceWorkflow
?.stateMachineArn ?? "",
Expand Down Expand Up @@ -225,6 +227,10 @@ export class ApiResolvers extends Construct {
);
}

if (props.ragEngines?.deleteDocumentWorkflow) {
props.ragEngines.deleteDocumentWorkflow.grantStartExecution(apiHandler);
}

if (props.ragEngines?.sageMakerRagModels) {
apiHandler.addToRolePolicy(
new iam.PolicyStatement({
Expand Down
16 changes: 14 additions & 2 deletions lib/chatbot-api/schema/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ type DocumentResult @aws_cognito_user_pools {
status: String
}

type DeleteDocumentResult @aws_cognito_user_pools {
documentId: String!
deleted: Boolean!
}

type UserFeedbackResult @aws_cognito_user_pools {
feedback_id: String!
}
Expand Down Expand Up @@ -240,6 +245,11 @@ type SessionHistoryItem @aws_cognito_user_pools {
metadata: String
}

input DeleteDocumentInput {
workspaceId: String!
documentId: String!
}

input UserFeedbackInput {
sessionId: String!
key: Int!
Expand Down Expand Up @@ -308,9 +318,11 @@ type Mutation {
@aws_cognito_user_pools
startKendraDataSync(workspaceId: String!): Boolean @aws_cognito_user_pools
deleteWorkspace(workspaceId: String!): Boolean @aws_cognito_user_pools
deleteDocument(input: DeleteDocumentInput!): DeleteDocumentResult @aws_cognito_user_pools
addTextDocument(input: TextDocumentInput!): DocumentResult
@aws_cognito_user_pools
addUserFeedback(input: UserFeedbackInput!): UserFeedbackResult @aws_cognito_user_pools
addUserFeedback(input: UserFeedbackInput!): UserFeedbackResult
@aws_cognito_user_pools
addQnADocument(input: QnADocumentInput!): DocumentResult
@aws_cognito_user_pools
setDocumentSubscriptionStatus(
Expand Down Expand Up @@ -363,4 +375,4 @@ schema {
query: Query
mutation: Mutation
subscription: Subscription
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def process_record(record):
workspace_id=workspace_id,
document_type="file",
path=file_name,
title=file_name,
size_in_bytes=object_size,
)

Expand Down
2 changes: 2 additions & 0 deletions lib/rag-engines/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export class RagEngines extends Construct {
public readonly fileImportWorkflow?: sfn.StateMachine;
public readonly websiteCrawlingWorkflow?: sfn.StateMachine;
public readonly deleteWorkspaceWorkflow?: sfn.StateMachine;
public readonly deleteDocumentWorkflow?: sfn.StateMachine;
public readonly dataImport: DataImport;

constructor(scope: Construct, id: string, props: RagEnginesProps) {
Expand Down Expand Up @@ -118,6 +119,7 @@ export class RagEngines extends Construct {
this.fileImportWorkflow = dataImport.fileImportWorkflow;
this.websiteCrawlingWorkflow = dataImport.websiteCrawlingWorkflow;
this.deleteWorkspaceWorkflow = workspaces.deleteWorkspaceWorkflow;
this.deleteDocumentWorkflow = workspaces.deleteDocumentWorkflow;
this.dataImport = dataImport;
}
}
177 changes: 177 additions & 0 deletions lib/rag-engines/workspaces/delete-document.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import * as cdk from "aws-cdk-lib";
import * as iam from "aws-cdk-lib/aws-iam";
import * as lambda from "aws-cdk-lib/aws-lambda";
import * as logs from "aws-cdk-lib/aws-logs";
import * as sfn from "aws-cdk-lib/aws-stepfunctions";
import * as tasks from "aws-cdk-lib/aws-stepfunctions-tasks";
import { Construct } from "constructs";
import * as path from "path";
import { Shared } from "../../shared";
import { SystemConfig } from "../../shared/types";
import { AuroraPgVector } from "../aurora-pgvector";
import { DataImport } from "../data-import";
import { KendraRetrieval } from "../kendra-retrieval";
import { OpenSearchVector } from "../opensearch-vector";
import { RagDynamoDBTables } from "../rag-dynamodb-tables";
import { RemovalPolicy } from "aws-cdk-lib";

export interface DeleteDocumentProps {
readonly config: SystemConfig;
readonly shared: Shared;
readonly dataImport: DataImport;
readonly ragDynamoDBTables: RagDynamoDBTables;
readonly auroraPgVector?: AuroraPgVector;
readonly openSearchVector?: OpenSearchVector;
readonly kendraRetrieval?: KendraRetrieval;
}

export class DeleteDocument extends Construct {
public readonly stateMachine?: sfn.StateMachine;

constructor(scope: Construct, id: string, props: DeleteDocumentProps) {
super(scope, id);

const deleteFunction = new lambda.Function(this, "DeleteDocumentFunction", {
vpc: props.shared.vpc,
code: props.shared.sharedCode.bundleWithLambdaAsset(
path.join(__dirname, "./functions/delete-document-workflow/delete")
),
runtime: props.shared.pythonRuntime,
architecture: props.shared.lambdaArchitecture,
handler: "index.lambda_handler",
layers: [props.shared.powerToolsLayer, props.shared.commonLayer],
timeout: cdk.Duration.minutes(15),
logRetention: logs.RetentionDays.ONE_WEEK,
environment: {
...props.shared.defaultEnvironmentVariables,
AURORA_DB_SECRET_ID: props.auroraPgVector?.database.secret
?.secretArn as string,
UPLOAD_BUCKET_NAME: props.dataImport.uploadBucket.bucketName,
PROCESSING_BUCKET_NAME: props.dataImport.processingBucket.bucketName,
WORKSPACES_TABLE_NAME:
props.ragDynamoDBTables.workspacesTable.tableName,
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME:
props.ragDynamoDBTables.workspacesByObjectTypeIndexName,
DOCUMENTS_TABLE_NAME:
props.ragDynamoDBTables?.documentsTable.tableName ?? "",
DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME:
props.ragDynamoDBTables?.documentsByCompoundKeyIndexName ?? "",
DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME:
props.kendraRetrieval?.kendraS3DataSourceBucket?.bucketName ?? "",
OPEN_SEARCH_COLLECTION_ENDPOINT:
props.openSearchVector?.openSearchCollectionEndpoint ?? "",
},
});

if (props.auroraPgVector) {
props.auroraPgVector.database.secret?.grantRead(deleteFunction);
props.auroraPgVector.database.connections.allowDefaultPortFrom(
deleteFunction
);
}

if (props.openSearchVector) {
deleteFunction.addToRolePolicy(
new iam.PolicyStatement({
actions: [
"aoss:APIAccessAll",
"aoss:DescribeIndex",
"aoss:UpdateIndex",
],
resources: [props.openSearchVector.openSearchCollection.attrArn],
})
);

props.openSearchVector.addToAccessPolicy(
"delete-document",
[deleteFunction.role?.roleArn],
[
"aoss:DescribeIndex",
"aoss:UpdateIndex",
"aoss:ReadDocument",
"aoss:WriteDocument",
]
);
}

props.dataImport.uploadBucket.grantReadWrite(deleteFunction);
props.dataImport.processingBucket.grantReadWrite(deleteFunction);
props.kendraRetrieval?.kendraS3DataSourceBucket?.grantReadWrite(
deleteFunction
);
props.ragDynamoDBTables.workspacesTable.grantReadWriteData(deleteFunction);
props.ragDynamoDBTables.documentsTable.grantReadWriteData(deleteFunction);

const handleError = new tasks.DynamoUpdateItem(this, "HandleError", {
table: props.ragDynamoDBTables.documentsTable,
key: {
workspace_id: tasks.DynamoAttributeValue.fromString(
sfn.JsonPath.stringAt("$.workspace_id")
),
document_id: tasks.DynamoAttributeValue.fromString(
sfn.JsonPath.stringAt("$.document_id")
),
},
updateExpression: "set #status = :error",
expressionAttributeNames: {
"#status": "status",
},
expressionAttributeValues: {
":error": tasks.DynamoAttributeValue.fromString("error"),
},
}).next(
new sfn.Fail(this, "Fail", {
cause: "Document deletion failed",
})
);

const setDeleting = new tasks.DynamoUpdateItem(this, "SetDeleting", {
table: props.ragDynamoDBTables.documentsTable,
key: {
workspace_id: tasks.DynamoAttributeValue.fromString(
sfn.JsonPath.stringAt("$.workspace_id")
),
document_id: tasks.DynamoAttributeValue.fromString(
sfn.JsonPath.stringAt("$.document_id")
),
},
updateExpression: "set #status=:statusValue",
expressionAttributeNames: {
"#status": "status",
},
expressionAttributeValues: {
":statusValue": tasks.DynamoAttributeValue.fromString("deleting"),
},
resultPath: sfn.JsonPath.DISCARD,
});

const deleteTask = new tasks.LambdaInvoke(this, "Delete", {
lambdaFunction: deleteFunction,
resultPath: "$.deleteResult",
}).addCatch(handleError, {
errors: ["States.ALL"],
resultPath: "$.deleteResult",
});

const workflow = setDeleting
.next(deleteTask)
.next(new sfn.Succeed(this, "Success"));

const logGroup = new logs.LogGroup(this, "DeleteDocumentSMLogGroup", {
removalPolicy: RemovalPolicy.DESTROY,
});

const stateMachine = new sfn.StateMachine(this, "DeleteDocument", {
definitionBody: sfn.DefinitionBody.fromChainable(workflow),
timeout: cdk.Duration.minutes(5),
comment: "Delete Document Workflow",
tracingEnabled: true,
logs: {
destination: logGroup,
level: sfn.LogLevel.ALL,
},
});

this.stateMachine = stateMachine;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import genai_core.types
import genai_core.workspaces
import genai_core.documents
import genai_core.aurora.delete
import genai_core.opensearch.delete
import genai_core.kendra.delete
from aws_lambda_powertools import Logger
from aws_lambda_powertools.utilities.typing import LambdaContext

logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context: LambdaContext):
workspace_id = event["workspace_id"]
document_id = event["document_id"]
workspace = genai_core.workspaces.get_workspace(workspace_id)
if workspace is None:
raise genai_core.types.CommonError("Workspace not found")

document = genai_core.documents.get_document(workspace_id, document_id)
if document is None:
raise genai_core.types.CommonError("Document not found")

if workspace["engine"] == "opensearch":
genai_core.opensearch.delete.delete_open_search_document(workspace_id, document)
elif workspace["engine"] == "aurora":
genai_core.aurora.delete.delete_aurora_document(workspace_id, document)
elif workspace["engine"] == "kendra":
genai_core.kendra.delete.delete_kendra_document(workspace_id, document)
else:
raise genai_core.types.CommonError("Workspace engine not supported")
21 changes: 19 additions & 2 deletions lib/rag-engines/workspaces/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { KendraRetrieval } from "../kendra-retrieval";
import { OpenSearchVector } from "../opensearch-vector";
import { RagDynamoDBTables } from "../rag-dynamodb-tables";
import { DeleteWorkspace } from "./delete-workspace";
import { DeleteDocument } from "./delete-document";

export interface WorkkspacesProps {
readonly config: SystemConfig;
Expand All @@ -21,11 +22,26 @@ export interface WorkkspacesProps {

export class Workspaces extends Construct {
public readonly deleteWorkspaceWorkflow?: sfn.StateMachine;
public readonly deleteDocumentWorkflow?: sfn.StateMachine;

constructor(scope: Construct, id: string, props: WorkkspacesProps) {
super(scope, id);

const workflow = new DeleteWorkspace(this, "DeleteWorkspace", {
const deleteWorkspaceWorkflow = new DeleteWorkspace(
this,
"DeleteWorkspace",
{
config: props.config,
shared: props.shared,
dataImport: props.dataImport,
ragDynamoDBTables: props.ragDynamoDBTables,
auroraPgVector: props.auroraPgVector,
openSearchVector: props.openSearchVector,
kendraRetrieval: props.kendraRetrieval,
}
);

const deleteDocumentWorkflow = new DeleteDocument(this, "DeleteDocument", {
config: props.config,
shared: props.shared,
dataImport: props.dataImport,
Expand All @@ -35,6 +51,7 @@ export class Workspaces extends Construct {
kendraRetrieval: props.kendraRetrieval,
});

this.deleteWorkspaceWorkflow = workflow.stateMachine;
this.deleteWorkspaceWorkflow = deleteWorkspaceWorkflow.stateMachine;
this.deleteDocumentWorkflow = deleteDocumentWorkflow.stateMachine;
}
}
2 changes: 1 addition & 1 deletion lib/shared/file-import-batch-job/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cfnresponse==1.1.2
aws_requests_auth==0.4.3
requests-aws4auth==1.2.3
langchain==0.1.5
opensearch-py==2.3.1
opensearch-py==2.4.2
psycopg2-binary==2.9.7
pgvector==0.2.2
pydantic==2.3.0
Expand Down
2 changes: 1 addition & 1 deletion lib/shared/layers/common/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cfnresponse==1.1.2
aws_requests_auth==0.4.3
requests-aws4auth==1.2.3
langchain==0.1.5
opensearch-py==2.3.1
opensearch-py==2.4.2
psycopg2-binary==2.9.7
pgvector==0.2.2
pydantic==2.3.0
Expand Down
Loading
Loading