diff --git a/extensions/ql-vscode/src/query-results.ts b/extensions/ql-vscode/src/query-results.ts index 0f30ea80829..35c0bdcab7b 100644 --- a/extensions/ql-vscode/src/query-results.ts +++ b/extensions/ql-vscode/src/query-results.ts @@ -19,6 +19,7 @@ import { QueryStatus } from './query-status'; import { RemoteQueryHistoryItem } from './remote-queries/remote-query-history-item'; import { QueryEvaluationInfo, QueryWithResults } from './run-queries-shared'; import { formatLegacyMessage } from './legacy-query-server/run-queries'; +import { sarifParser } from './sarif-parser'; /** * query-results.ts @@ -160,10 +161,12 @@ export async function interpretResultsSarif( sourceInfo?: cli.SourceInfo ): Promise { const { resultsPath, interpretedResultsPath } = resultsPaths; + let res; if (await fs.pathExists(interpretedResultsPath)) { - return { ...JSON.parse(await fs.readFile(interpretedResultsPath, 'utf8')), t: 'SarifInterpretationData' }; + res = await sarifParser(interpretedResultsPath); + } else { + res = await cli.interpretBqrsSarif(ensureMetadataIsComplete(metadata), resultsPath, interpretedResultsPath, sourceInfo); } - const res = await cli.interpretBqrsSarif(ensureMetadataIsComplete(metadata), resultsPath, interpretedResultsPath, sourceInfo); return { ...res, t: 'SarifInterpretationData' }; } diff --git a/extensions/ql-vscode/src/sarif-parser.ts b/extensions/ql-vscode/src/sarif-parser.ts index c08d93c5ef2..7f81527eac5 100644 --- a/extensions/ql-vscode/src/sarif-parser.ts +++ b/extensions/ql-vscode/src/sarif-parser.ts @@ -1,33 +1,34 @@ import * as Sarif from 'sarif'; import * as fs from 'fs-extra'; -import { parser } from 'stream-json'; -import { pick } from 'stream-json/filters/Pick'; -import Assembler = require('stream-json/Assembler'); -import { chain } from 'stream-chain'; +import { connectTo } from 'stream-json/Assembler'; import { getErrorMessage } from './pure/helpers-pure'; +import { withParser } from 'stream-json/filters/Pick'; const DUMMY_TOOL: Sarif.Tool = { driver: { name: '' } }; export async function sarifParser(interpretedResultsPath: string): Promise { try { // Parse the SARIF file into token streams, filtering out only the results array. - const p = parser(); - const pipeline = chain([ - fs.createReadStream(interpretedResultsPath), - p, - pick({ filter: 'runs.0.results' }) - ]); + const pipeline = fs.createReadStream(interpretedResultsPath).pipe(withParser({ filter: 'runs.0.results' })); // Creates JavaScript objects from the token stream - const asm = Assembler.connectTo(pipeline); + const asm = connectTo(pipeline); - // Returns a constructed Log object with the results or an empty array if no results were found. + // Returns a constructed Log object with the results of an empty array if no results were found. // If the parser fails for any reason, it will reject the promise. return await new Promise((resolve, reject) => { + let alreadyDone = false; pipeline.on('error', (error) => { reject(error); }); + // If the parser pipeline completes before the assembler, we've reached end of file and have not found any results. + pipeline.on('end', () => { + if (!alreadyDone) { + reject(new Error('Invalid SARIF file: expecting at least one run with result.')); + } + }); + asm.on('done', (asm) => { const log: Sarif.Log = { @@ -41,6 +42,7 @@ export async function sarifParser(interpretedResultsPath: string): Promise { let disposeSpy: sinon.SinonSpy; @@ -155,68 +157,213 @@ describe('query-results', () => { }); }); - it('should interpretResultsSarif', async () => { - const spy = sandbox.mock(); - spy.returns({ a: '1234' }); - const mockServer = { - interpretBqrsSarif: spy - } as unknown as CodeQLCliServer; - - const interpretedResultsPath = path.join(tmpDir.name, 'interpreted.json'); - const resultsPath = '123'; - const sourceInfo = {}; + describe('interpretResultsSarif', () => { + let mockServer: CodeQLCliServer; + let spy: Sinon.SinonExpectation; const metadata = { kind: 'my-kind', id: 'my-id' as string | undefined, scored: undefined }; - const results1 = await interpretResultsSarif( - mockServer, - metadata, - { - resultsPath, interpretedResultsPath - }, - sourceInfo as SourceInfo - ); + const resultsPath = '123'; + const interpretedResultsPath = path.join(tmpDir.name, 'interpreted.json'); + const sourceInfo = {}; - expect(results1).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' }); - expect(spy).to.have.been.calledWith( - metadata, - resultsPath, interpretedResultsPath, sourceInfo - ); + beforeEach(() => { + spy = sandbox.mock(); + spy.returns({ a: '1234' }); - // Try again, but with no id - spy.reset(); - spy.returns({ a: '1234' }); - delete metadata.id; - const results2 = await interpretResultsSarif( - mockServer, - metadata, - { - resultsPath, interpretedResultsPath - }, - sourceInfo as SourceInfo - ); - expect(results2).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' }); - expect(spy).to.have.been.calledWith( - { kind: 'my-kind', id: 'dummy-id', scored: undefined }, - resultsPath, interpretedResultsPath, sourceInfo - ); + mockServer = { + interpretBqrsSarif: spy + } as unknown as CodeQLCliServer; + }); - // try a third time, but this time we get from file - spy.reset(); - fs.writeFileSync(interpretedResultsPath, JSON.stringify({ - a: 6 - }), 'utf8'); - const results3 = await interpretResultsSarif( - mockServer, - metadata, - { - resultsPath, interpretedResultsPath - }, - sourceInfo as SourceInfo - ); - expect(results3).to.deep.eq({ a: 6, t: 'SarifInterpretationData' }); + afterEach(async () => { + sandbox.restore(); + safeDel(interpretedResultsPath); + }); + + it('should interpretResultsSarif', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + const results = await interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo + ); + + expect(results).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' }); + expect(spy).to.have.been.calledWith( + metadata, + resultsPath, interpretedResultsPath, sourceInfo + ); + }); + + it('should interpretBqrsSarif without ID', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + delete metadata.id; + const results = await interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo + ); + expect(results).to.deep.eq({ a: '1234', t: 'SarifInterpretationData' }); + expect(spy).to.have.been.calledWith( + { kind: 'my-kind', id: 'dummy-id', scored: undefined }, + resultsPath, interpretedResultsPath, sourceInfo + ); + }); + + it('should use sarifParser on a valid small SARIF file', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + fs.writeFileSync(interpretedResultsPath, JSON.stringify({ + runs: [{ results: [] }] // A run needs results to succeed. + }), 'utf8'); + const results = await interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo + ); + // We do not re-interpret if we are reading from a SARIF file. + expect(spy).to.not.have.been.called; + + expect(results).to.have.property('t', 'SarifInterpretationData'); + expect(results).to.have.nested.property('runs[0].results'); + }); + + it('should throw an error on an invalid small SARIF file', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + fs.writeFileSync(interpretedResultsPath, JSON.stringify({ + a: '6' // Invalid: no runs or results + }), 'utf8'); + + await expect( + interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo) + ).to.be.rejectedWith('Parsing output of interpretation failed: Invalid SARIF file: expecting at least one run with result.'); + + // We do not attempt to re-interpret if we are reading from a SARIF file. + expect(spy).to.not.have.been.called; + }); + + it('should use sarifParser on a valid large SARIF file', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + const validSarifStream = fs.createWriteStream(interpretedResultsPath, { flags: 'w' }); + + const finished = new Promise((res, rej) => { + validSarifStream.addListener('close', res); + validSarifStream.addListener('error', rej); + }); + + validSarifStream.write(JSON.stringify({ + runs: [{ results: [] }] // A run needs results to succeed. + }), 'utf8'); + + validSarifStream.write('[', 'utf8'); + const iterations = 1_000_000; + for (let i = 0; i < iterations; i++) { + validSarifStream.write(JSON.stringify({ + a: '6' + }), 'utf8'); + if (i < iterations - 1) { + validSarifStream.write(','); + } + } + validSarifStream.write(']', 'utf8'); + validSarifStream.end(); + await finished; + + // We need to sleep to wait for MSFT Defender to scan the file + // so that it can be read by our test. + if (os.platform() === 'win32') { + await sleep(10_000); + } + + const results = await interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo + ); + // We do not re-interpret if we are reading from a SARIF file. + expect(spy).to.not.have.been.called; + + expect(results).to.have.property('t', 'SarifInterpretationData'); + expect(results).to.have.nested.property('runs[0].results'); + }); + + it('should throw an error on an invalid large SARIF file', async function() { + // up to 2 minutes per test + this.timeout(2 * 60 * 1000); + + // There is a problem on Windows where the file at the prior path isn't able + // to be deleted or written to, so we rename the path for this last test. + const interpretedResultsPath = path.join(tmpDir.name, 'interpreted-invalid.json'); + const invalidSarifStream = fs.createWriteStream(interpretedResultsPath, { flags: 'w' }); + + const finished = new Promise((res, rej) => { + invalidSarifStream.addListener('close', res); + invalidSarifStream.addListener('error', rej); + }); + + invalidSarifStream.write('[', 'utf8'); + const iterations = 1_000_000; + for (let i = 0; i < iterations; i++) { + invalidSarifStream.write(JSON.stringify({ + a: '6' + }), 'utf8'); + if (i < iterations - 1) { + invalidSarifStream.write(','); + } + } + invalidSarifStream.write(']', 'utf8'); + invalidSarifStream.end(); + await finished; + + // We need to sleep to wait for MSFT Defender to scan the file + // so that it can be read by our test. + if (os.platform() === 'win32') { + await sleep(10_000); + } + + await expect( + interpretResultsSarif( + mockServer, + metadata, + { + resultsPath, interpretedResultsPath + }, + sourceInfo as SourceInfo) + ).to.be.rejectedWith('Parsing output of interpretation failed: Invalid SARIF file: expecting at least one run with result.'); + + // We do not attempt to re-interpret if we are reading from a SARIF file. + expect(spy).to.not.have.been.called; + }); }); describe('splat and slurp', () => { @@ -300,6 +447,18 @@ describe('query-results', () => { }); }); + function safeDel(file: string) { + try { + fs.unlinkSync(file); + } catch (e) { + // ignore + } + } + + async function sleep(ms: number) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + function createMockQueryWithResults( queryPath: string, didRunSuccessfully = true,