Skip to content

Commit

Permalink
fix: Generalize RAG files import from Google Drive
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 639184120
  • Loading branch information
yinghsienwu authored and Copybara-Service committed May 31, 2024
1 parent ba65828 commit 88c6a6a
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
3 changes: 3 additions & 0 deletions tests/unit/vertex_rag/test_rag_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
TEST_DRIVE_FOLDER = (
f"https://drive.google.com/corp/drive/folders/{TEST_DRIVE_FOLDER_ID}"
)
TEST_DRIVE_FOLDER_2 = (
f"https://drive.google.com/drive/folders/{TEST_DRIVE_FOLDER_ID}?resourcekey=0-eiOT3"
)
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER = ImportRagFilesConfig()
TEST_IMPORT_FILES_CONFIG_DRIVE_FOLDER.google_drive_source.resource_ids = [
GoogleDriveSource.ResourceId(
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/vertex_rag/test_rag_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,11 @@ def test_prepare_import_files_request_list_gcs_uris(self):
)
import_files_request_eq(request, tc.TEST_IMPORT_REQUEST_GCS)

def test_prepare_import_files_request_drive_folders(self):
paths = [tc.TEST_DRIVE_FOLDER]
@pytest.mark.parametrize("path", [tc.TEST_DRIVE_FOLDER, tc.TEST_DRIVE_FOLDER_2])
def test_prepare_import_files_request_drive_folders(self, path):
request = prepare_import_files_request(
corpus_name=tc.TEST_RAG_CORPUS_RESOURCE_NAME,
paths=paths,
paths=[path],
chunk_size=tc.TEST_CHUNK_SIZE,
chunk_overlap=tc.TEST_CHUNK_OVERLAP,
)
Expand Down
6 changes: 4 additions & 2 deletions vertexai/preview/rag/utils/_gapic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,12 @@ def convert_path_to_resource_id(
# Google Drive source
path_list = path.split("/")
if "file" in path_list:
resource_id = path_list[5]
index = path_list.index("file") + 2
resource_id = path_list[index].split("?")[0]
resource_type = GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FILE
elif "folders" in path_list:
resource_id = path_list[6]
index = path_list.index("folders") + 1
resource_id = path_list[index].split("?")[0]
resource_type = (
GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER
)
Expand Down

0 comments on commit 88c6a6a

Please sign in to comment.