diff --git a/server/aws-lsp-codewhisperer/src/language-server/workspaceContext/artifactManager.ts b/server/aws-lsp-codewhisperer/src/language-server/workspaceContext/artifactManager.ts index e2ad2db788..9518f9629a 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/workspaceContext/artifactManager.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/workspaceContext/artifactManager.ts @@ -66,6 +66,7 @@ interface FileSizeDetails { skippedSize: number } const MAX_UNCOMPRESSED_SRC_SIZE_BYTES = 2 * 1024 * 1024 * 1024 // 2 GB +const MAX_FILES = 500_000 export class ArtifactManager { private workspace: Workspace @@ -175,7 +176,8 @@ export class ArtifactManager { } if (isDirectory(filePath)) { - const files = await glob(['**/*'], { + let fileCount = 0 + const filesStream = glob.stream(['**/*'], { cwd: filePath, dot: false, ignore: IGNORE_DEPENDENCY_PATTERNS, @@ -184,7 +186,11 @@ export class ArtifactManager { onlyFiles: true, }) - for (const relativePath of files) { + for await (const entry of filesStream) { + if (fileCount >= MAX_FILES) { + break + } + const relativePath = entry.toString() try { const fullPath = resolveSymlink(path.join(filePath, relativePath)) const fileMetadata = await this.createFileMetadata( @@ -197,6 +203,7 @@ export class ArtifactManager { } catch (error) { this.logging.warn(`Error processing file ${relativePath}: ${error}`) } + fileCount++ } } else { const workspaceUri = URI.parse(currentWorkspace.uri) @@ -359,7 +366,8 @@ export class ArtifactManager { ): Promise> { const filesByLanguage = new Map() - const files = await glob(['**/*'], { + const files = [] + const filesStream = glob.stream(['**/*'], { cwd: directoryPath, dot: false, ignore: IGNORE_PATTERNS, @@ -368,11 +376,26 @@ export class ArtifactManager { onlyFiles: true, }) + for await (const entry of filesStream) { + if (files.length >= MAX_FILES) { + break + } + files.push(entry.toString()) + } + + const hasJavaFile = files.some(file => file.endsWith('.java')) + for (const relativePath of files) { const fullPath = path.join(directoryPath, relativePath) - const language = getCodeWhispererLanguageIdFromPath(fullPath) - - if (!language || !SUPPORTED_WORKSPACE_CONTEXT_LANGUAGES.includes(language)) { + const isJavaProjectFile = isJavaProjectFileFromPath(fullPath) + const language = isJavaProjectFileFromPath(fullPath) ? 'java' : getCodeWhispererLanguageIdFromPath(fullPath) + + if ( + !language || + !SUPPORTED_WORKSPACE_CONTEXT_LANGUAGES.includes(language) || + // skip processing the java project file if there's no java source file + (!hasJavaFile && isJavaProjectFile) + ) { continue } @@ -631,7 +654,7 @@ export class ArtifactManager { files: FileMetadata[] ): Promise { const workspacePath = URI.parse(workspaceFolder.uri).path - const hasJavaFiles = files.some(file => file.language === 'java' && file.relativePath.endsWith('java')) + const hasJavaFiles = files.some(file => file.language === 'java' && file.relativePath.endsWith('.java')) if (!hasJavaFiles) { return files diff --git a/server/aws-lsp-codewhisperer/src/shared/languageDetection.test.ts b/server/aws-lsp-codewhisperer/src/shared/languageDetection.test.ts index 67873bee0c..c8a1193677 100644 --- a/server/aws-lsp-codewhisperer/src/shared/languageDetection.test.ts +++ b/server/aws-lsp-codewhisperer/src/shared/languageDetection.test.ts @@ -45,10 +45,6 @@ describe('LanguageDetection', () => { describe('getCodeWhispererLanguageIdFromPath', () => { it('should return language type with override', () => { - assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/pom.xml'), 'java') - assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/build.gradle.kts'), 'java') - assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/build.xml'), 'java') - assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/build.gradle'), 'java') assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/test.java'), 'java') assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/package.json'), 'javascript') assert.strictEqual(getCodeWhispererLanguageIdFromPath('test/test.js'), 'javascript') diff --git a/server/aws-lsp-codewhisperer/src/shared/languageDetection.ts b/server/aws-lsp-codewhisperer/src/shared/languageDetection.ts index d8a4074d92..ab3761528b 100644 --- a/server/aws-lsp-codewhisperer/src/shared/languageDetection.ts +++ b/server/aws-lsp-codewhisperer/src/shared/languageDetection.ts @@ -294,8 +294,6 @@ export function getCodeWhispererLanguageIdFromPath(filePath: string): Codewhispe return 'javascript' } - if (isJavaProjectFileFromPath(filePath)) return 'java' - for (const [extension, languageId] of Object.entries(languageByExtension)) { if (filePath.endsWith(extension)) { return getRuntimeLanguage(languageId)