From da3d1a3b5f47c4cd92a65d690220c6ccd3754984 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Wed, 24 Jun 2020 13:28:11 +0100 Subject: [PATCH 01/19] JS: Recognize 'lang' attribute of script tags --- .../src/com/semmle/js/extractor/HTMLExtractor.java | 8 +++++++- .../TypeScript/EmbeddedInScript/Test.expected | 2 ++ .../library-tests/TypeScript/EmbeddedInScript/Test.ql | 5 +++++ .../library-tests/TypeScript/EmbeddedInScript/test.vue | 5 +++++ 4 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.expected create mode 100644 javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql create mode 100644 javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/test.vue diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index 3274741a9706..3ed56d43cadf 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -1,12 +1,14 @@ package com.semmle.js.extractor; +import java.util.regex.Pattern; + import com.semmle.js.extractor.ExtractorConfig.Platform; import com.semmle.js.extractor.ExtractorConfig.SourceType; import com.semmle.js.parser.ParseError; import com.semmle.util.data.StringUtil; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; -import java.util.regex.Pattern; + import net.htmlparser.jericho.Attribute; import net.htmlparser.jericho.Attributes; import net.htmlparser.jericho.CharacterReference; @@ -143,6 +145,10 @@ private SourceType getScriptSourceType(Element script) { String scriptType = getAttributeValueLC(script, "type"); String scriptLanguage = getAttributeValueLC(script, "language"); + if (scriptLanguage == null) { // Vue templates use 'lang' instead of 'language'. + scriptLanguage = getAttributeValueLC(script, "lang"); + } + // if `type` and `language` are both either missing, contain the // string "javascript", or if `type` is the string "text/jsx", this is a plain script if ((scriptType == null || scriptType.contains("javascript") || "text/jsx".equals(scriptType)) diff --git a/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.expected b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.expected new file mode 100644 index 000000000000..eb65aeb71bfa --- /dev/null +++ b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.expected @@ -0,0 +1,2 @@ +classDeclaration +exprType diff --git a/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql new file mode 100644 index 000000000000..c1d865dd37f4 --- /dev/null +++ b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql @@ -0,0 +1,5 @@ +import javascript + +query ClassDefinition classDeclaration() { any() } + +query Type exprType(Expr e) { result = e.getType() } diff --git a/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/test.vue b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/test.vue new file mode 100644 index 000000000000..b2ec2523df3d --- /dev/null +++ b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/test.vue @@ -0,0 +1,5 @@ + From 164a18f02df0fbe4d138bba21ed8bc58c95f9b77 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 13:03:17 +0100 Subject: [PATCH 02/19] JS: Factor out extractFiles --- .../src/com/semmle/js/extractor/AutoBuild.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 15c721a11c9d..007674188ddc 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -608,6 +608,17 @@ private void extractSource() throws IOException { boolean hasTypeScriptFiles = extractedFiles.size() > 0; // extract remaining files + extractFiles( + filesToExtract, extractedFiles, defaultExtractor, customExtractors, hasTypeScriptFiles); + } + + private void extractFiles( + Set filesToExtract, + Set extractedFiles, + FileExtractor defaultExtractor, + Map customExtractors, + boolean hasTypeScriptFiles) { + for (Path f : filesToExtract) { if (extractedFiles.contains(f)) continue; From ea6b99e7263b93d8a53d0659a2f43fd7928173a2 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 13:04:54 +0100 Subject: [PATCH 03/19] JS: Add shouldExtract predicate --- .../extractor/src/com/semmle/js/extractor/AutoBuild.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 007674188ddc..4a93b39c0880 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -29,6 +29,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -609,7 +610,8 @@ private void extractSource() throws IOException { // extract remaining files extractFiles( - filesToExtract, extractedFiles, defaultExtractor, customExtractors, hasTypeScriptFiles); + filesToExtract, extractedFiles, defaultExtractor, customExtractors, + f -> !(hasTypeScriptFiles && isFileDerivedFromTypeScriptFile(f, extractedFiles))); } private void extractFiles( @@ -617,12 +619,12 @@ private void extractFiles( Set extractedFiles, FileExtractor defaultExtractor, Map customExtractors, - boolean hasTypeScriptFiles) { + Predicate shouldExtract) { for (Path f : filesToExtract) { if (extractedFiles.contains(f)) continue; - if (hasTypeScriptFiles && isFileDerivedFromTypeScriptFile(f, extractedFiles)) { + if (!shouldExtract.test(f)) { continue; } extractedFiles.add(f); From d55e3300f335b30cdfac81d32186175e274945e0 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 13:13:47 +0100 Subject: [PATCH 04/19] JS: Bundle FileExtractors into a class --- .../com/semmle/js/extractor/AutoBuild.java | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 4a93b39c0880..67af70591b3d 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -569,19 +569,33 @@ public int compare(File f1, File f2) { } }; + class FileExtractors { + FileExtractor defaultExtractor; + Map customExtractors = new LinkedHashMap<>(); + + FileExtractors(FileExtractor defaultExtractor) { + this.defaultExtractor = defaultExtractor; + } + + public FileExtractor forFile(Path f) { + return customExtractors.getOrDefault(FileUtil.extension(f), defaultExtractor); + } + } + /** Extract all supported candidate files that pass the filters. */ private void extractSource() throws IOException { // default extractor FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache); + FileExtractors extractors = new FileExtractors(defaultExtractor); + // custom extractor for explicitly specified file types - Map customExtractors = new LinkedHashMap<>(); for (Map.Entry spec : fileTypes.entrySet()) { String extension = spec.getKey(); String fileType = spec.getValue().name(); ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType); - customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache)); + extractors.customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache)); } Set filesToExtract = new LinkedHashSet<>(); @@ -610,15 +624,14 @@ private void extractSource() throws IOException { // extract remaining files extractFiles( - filesToExtract, extractedFiles, defaultExtractor, customExtractors, + filesToExtract, extractedFiles, extractors, f -> !(hasTypeScriptFiles && isFileDerivedFromTypeScriptFile(f, extractedFiles))); } private void extractFiles( Set filesToExtract, Set extractedFiles, - FileExtractor defaultExtractor, - Map customExtractors, + FileExtractors extractors, Predicate shouldExtract) { for (Path f : filesToExtract) { @@ -628,12 +641,7 @@ private void extractFiles( continue; } extractedFiles.add(f); - FileExtractor extractor = defaultExtractor; - if (!fileTypes.isEmpty()) { - String extension = FileUtil.extension(f); - if (customExtractors.containsKey(extension)) extractor = customExtractors.get(extension); - } - extract(extractor, f, null); + extract(extractors.forFile(f), f, null); } } From bfedcb01c4749a68f98c611d0dac47e806222713 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 13:20:11 +0100 Subject: [PATCH 05/19] JS: Make TypeScript aware of custom extractor extensions --- .../com/semmle/js/extractor/AutoBuild.java | 29 ++++++++++--------- .../js/extractor/test/AutoBuildTests.java | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 67af70591b3d..ea8abfee2aa1 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -569,7 +569,7 @@ public int compare(File f1, File f2) { } }; - class FileExtractors { + public class FileExtractors { FileExtractor defaultExtractor; Map customExtractors = new LinkedHashMap<>(); @@ -580,6 +580,10 @@ class FileExtractors { public FileExtractor forFile(Path f) { return customExtractors.getOrDefault(FileUtil.extension(f), defaultExtractor); } + + public FileType fileType(Path f) { + return forFile(f).getFileType(f.toFile()); + } } /** Extract all supported candidate files that pass the filters. */ @@ -614,11 +618,11 @@ private void extractSource() throws IOException { if (!tsconfigFiles.isEmpty()) { dependencyInstallationResult = this.preparePackagesAndDependencies(filesToExtract); } + Set extractedFiles = new LinkedHashSet<>(); // extract TypeScript projects and files - Set extractedFiles = - extractTypeScript( - defaultExtractor, filesToExtract, tsconfigFiles, dependencyInstallationResult); + extractTypeScript(filesToExtract, extractedFiles, + extractors, tsconfigFiles, dependencyInstallationResult); boolean hasTypeScriptFiles = extractedFiles.size() > 0; @@ -959,12 +963,11 @@ private ExtractorConfig mkExtractorConfig() { } private Set extractTypeScript( - FileExtractor extractor, Set files, + Set extractedFiles, + FileExtractors extractors, List tsconfig, DependencyInstallationResult deps) { - Set extractedFiles = new LinkedHashSet<>(); - if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) { ExtractorState extractorState = new ExtractorState(); TypeScriptParser tsParser = extractorState.getTypeScriptParser(); @@ -993,7 +996,7 @@ private Set extractTypeScript( Path sourcePath = sourceFile.toPath(); if (!files.contains(normalizePath(sourcePath))) continue; if (!project.getOwnFiles().contains(sourceFile) && explicitlyIncludedFiles.contains(sourceFile)) continue; - if (!FileType.TYPESCRIPT.getExtensions().contains(FileUtil.extension(sourcePath))) { + if (extractors.fileType(sourcePath) != FileType.TYPESCRIPT) { // For the time being, skip non-TypeScript files, even if the TypeScript // compiler can parse them for us. continue; @@ -1003,7 +1006,7 @@ private Set extractTypeScript( } } typeScriptFiles.sort(PATH_ORDERING); - extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor, extractorState); + extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors, extractorState); tsParser.closeProject(projectFile); } @@ -1017,12 +1020,12 @@ private Set extractTypeScript( List remainingTypeScriptFiles = new ArrayList<>(); for (Path f : files) { if (!extractedFiles.contains(f) - && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) { + && extractors.fileType(f) == FileType.TYPESCRIPT) { remainingTypeScriptFiles.add(f); } } if (!remainingTypeScriptFiles.isEmpty()) { - extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor, extractorState); + extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractors, extractorState); } // The TypeScript compiler instance is no longer needed. @@ -1108,7 +1111,7 @@ public void verifyTypeScriptInstallation(ExtractorState extractorState) { public void extractTypeScriptFiles( List files, Set extractedFiles, - FileExtractor extractor, + FileExtractors extractors, ExtractorState extractorState) { List list = files .stream() @@ -1117,7 +1120,7 @@ public void extractTypeScriptFiles( extractorState.getTypeScriptParser().prepareFiles(list); for (Path path : files) { extractedFiles.add(path); - extract(extractor, path, extractorState); + extract(extractors.forFile(path), path, extractorState); } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java index 34af4daec71e..38699936db38 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java @@ -123,7 +123,7 @@ public void verifyTypeScriptInstallation(ExtractorState state) {} public void extractTypeScriptFiles( java.util.List files, java.util.Set extractedFiles, - FileExtractor extractor, + FileExtractors extractors, ExtractorState extractorState) { for (Path f : files) { actual.add(f.toString()); From 8632c2a3b269cd831fcb7eb1184772c1ab4fb67f Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 21:08:20 +0100 Subject: [PATCH 06/19] JS: Factor out VirtualSourceRoot --- .../com/semmle/js/extractor/AutoBuild.java | 15 ++--- .../DependencyInstallationResult.java | 29 +--------- .../semmle/js/extractor/ExtractorConfig.java | 23 +++++++- .../src/com/semmle/js/extractor/Main.java | 2 +- .../js/extractor/VirtualSourceRoot.java | 56 +++++++++++++++++++ .../semmle/js/parser/TypeScriptParser.java | 19 ++++--- 6 files changed, 96 insertions(+), 48 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index ea8abfee2aa1..671d8b9bfff3 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -211,6 +211,7 @@ public class AutoBuild { private volatile boolean seenCode = false; private boolean installDependencies = false; private int installDependenciesTimeout; + private final VirtualSourceRoot virtualSourceRoot; /** The default timeout when running yarn, in milliseconds. */ public static final int INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT = 10 * 60 * 1000; // 10 minutes @@ -228,6 +229,7 @@ public AutoBuild() { Env.systemEnv() .getInt( "LGTM_INDEX_TYPESCRIPT_INSTALL_DEPS_TIMEOUT", INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT); + this.virtualSourceRoot = new VirtualSourceRoot(LGTM_SRC, toRealPath(Paths.get(EnvironmentVariables.getScratchDir()))); setupFileTypes(); setupXmlMode(); setupMatchers(); @@ -758,7 +760,6 @@ public static Path tryRelativize(Path from, Path to) { */ protected DependencyInstallationResult preparePackagesAndDependencies(Set filesToExtract) { final Path sourceRoot = LGTM_SRC; - final Path virtualSourceRoot = toRealPath(Paths.get(EnvironmentVariables.getScratchDir())); // Read all package.json files and index them by name. Map packageJsonFiles = new LinkedHashMap<>(); @@ -845,8 +846,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set // Write the new package.json files to disk for (Path file : packageJsonFiles.keySet()) { - Path relativePath = sourceRoot.relativize(file); - Path virtualFile = virtualSourceRoot.resolve(relativePath); + Path virtualFile = virtualSourceRoot.toVirtualFile(file); try { Files.createDirectories(virtualFile.getParent()); @@ -861,7 +861,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set // Install dependencies if (this.installDependencies && verifyYarnInstallation()) { for (Path file : packageJsonFiles.keySet()) { - Path virtualFile = virtualSourceRoot.resolve(sourceRoot.relativize(file)); + Path virtualFile = virtualSourceRoot.toVirtualFile(file); System.out.println("Installing dependencies from " + virtualFile); ProcessBuilder pb = new ProcessBuilder( @@ -887,7 +887,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set } } - return new DependencyInstallationResult(sourceRoot, virtualSourceRoot, packageMainFile, packagesInRepo); + return new DependencyInstallationResult(packageMainFile, packagesInRepo); } /** @@ -958,6 +958,7 @@ private ExtractorConfig mkExtractorConfig() { ExtractorConfig config = new ExtractorConfig(true); config = config.withSourceType(getSourceType()); config = config.withTypeScriptMode(typeScriptMode); + config = config.withVirtualSourceRoot(virtualSourceRoot); if (defaultEncoding != null) config = config.withDefaultEncoding(defaultEncoding); return config; } @@ -979,7 +980,7 @@ private Set extractTypeScript( Set explicitlyIncludedFiles = new LinkedHashSet<>(); if (tsconfig.size() > 1) { // No prioritization needed if there's only one tsconfig. for (Path projectPath : tsconfig) { - explicitlyIncludedFiles.addAll(tsParser.getOwnFiles(projectPath.toFile(), deps)); + explicitlyIncludedFiles.addAll(tsParser.getOwnFiles(projectPath.toFile(), deps, virtualSourceRoot)); } } @@ -987,7 +988,7 @@ private Set extractTypeScript( for (Path projectPath : tsconfig) { File projectFile = projectPath.toFile(); long start = logBeginProcess("Opening project " + projectFile); - ParsedProject project = tsParser.openProject(projectFile, deps); + ParsedProject project = tsParser.openProject(projectFile, deps, virtualSourceRoot); logEndProcess(start, "Done opening project " + projectFile); // Extract all files belonging to this project which are also matched // by our include/exclude filters. diff --git a/javascript/extractor/src/com/semmle/js/extractor/DependencyInstallationResult.java b/javascript/extractor/src/com/semmle/js/extractor/DependencyInstallationResult.java index 5e432e4a40a1..5dd6bd60b6af 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/DependencyInstallationResult.java +++ b/javascript/extractor/src/com/semmle/js/extractor/DependencyInstallationResult.java @@ -6,46 +6,19 @@ /** Contains the results of installing dependencies. */ public class DependencyInstallationResult { - private Path sourceRoot; - private Path virtualSourceRoot; private Map packageEntryPoints; private Map packageJsonFiles; public static final DependencyInstallationResult empty = - new DependencyInstallationResult(null, null, Collections.emptyMap(), Collections.emptyMap()); + new DependencyInstallationResult(Collections.emptyMap(), Collections.emptyMap()); public DependencyInstallationResult( - Path sourceRoot, - Path virtualSourceRoot, Map packageEntryPoints, Map packageJsonFiles) { - this.sourceRoot = sourceRoot; - this.virtualSourceRoot = virtualSourceRoot; this.packageEntryPoints = packageEntryPoints; this.packageJsonFiles = packageJsonFiles; } - /** - * Returns the source root mirrored by {@link #getVirtualSourceRoot()} or null - * if no virtual source root exists. - *

- * When invoked from the AutoBuilder, this corresponds to the source root. When invoked - * from ODASA, there is no notion of source root, so this is always null in that context. - */ - public Path getSourceRoot() { - return sourceRoot; - } - - /** - * Returns the virtual source root or null if no virtual source root exists. - *

- * The virtual source root is a directory hierarchy that mirrors the real source - * root, where dependencies are installed. - */ - public Path getVirtualSourceRoot() { - return virtualSourceRoot; - } - /** * Returns the mapping from package names to the TypeScript file that should * act as its main entry point. diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index dd3976eec78d..441d51bfb621 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -1,8 +1,5 @@ package com.semmle.js.extractor; -import com.semmle.js.parser.JcornWrapper; -import com.semmle.util.data.StringUtil; -import com.semmle.util.exception.UserError; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; @@ -12,6 +9,10 @@ import java.util.LinkedHashSet; import java.util.Set; +import com.semmle.js.parser.JcornWrapper; +import com.semmle.util.data.StringUtil; +import com.semmle.util.exception.UserError; + /** * Configuration options that affect the behaviour of the extractor. * @@ -235,6 +236,8 @@ public String toString() { /** The default character encoding to use for parsing source files. */ private String defaultEncoding; + + private VirtualSourceRoot virtualSourceRoot; public ExtractorConfig(boolean experimental) { this.ecmaVersion = experimental ? ECMAVersion.ECMA2020 : ECMAVersion.ECMA2019; @@ -252,6 +255,7 @@ public ExtractorConfig(boolean experimental) { this.typescriptMode = TypeScriptMode.NONE; this.e4x = experimental; this.defaultEncoding = StandardCharsets.UTF_8.name(); + this.virtualSourceRoot = VirtualSourceRoot.none; } public ExtractorConfig(ExtractorConfig that) { @@ -272,6 +276,7 @@ public ExtractorConfig(ExtractorConfig that) { this.typescriptMode = that.typescriptMode; this.typescriptRam = that.typescriptRam; this.defaultEncoding = that.defaultEncoding; + this.virtualSourceRoot = that.virtualSourceRoot; } public ECMAVersion getEcmaVersion() { @@ -452,6 +457,16 @@ public ExtractorConfig withDefaultEncoding(String defaultEncoding) { return res; } + public VirtualSourceRoot getVirtualSourceRoot() { + return virtualSourceRoot; + } + + public ExtractorConfig withVirtualSourceRoot(VirtualSourceRoot virtualSourceRoot) { + ExtractorConfig res = new ExtractorConfig(this); + res.virtualSourceRoot = virtualSourceRoot; + return res; + } + @Override public String toString() { return "ExtractorConfig [ecmaVersion=" @@ -486,6 +501,8 @@ public String toString() { + typescriptMode + ", defaultEncoding=" + defaultEncoding + + ", virtualSourceRoot=" + + virtualSourceRoot + "]"; } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index feb0ebfe2bbf..b74cde11f25f 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -152,7 +152,7 @@ public void run(String[] args) { for (File projectFile : projectFiles) { long start = verboseLogStartTimer(ap, "Opening project " + projectFile); - ParsedProject project = tsParser.openProject(projectFile, DependencyInstallationResult.empty); + ParsedProject project = tsParser.openProject(projectFile, DependencyInstallationResult.empty, VirtualSourceRoot.none); verboseLogEndTimer(ap, start); // Extract all files belonging to this project which are also matched // by our include/exclude filters. diff --git a/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java b/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java new file mode 100644 index 000000000000..c5fb4a3061a0 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java @@ -0,0 +1,56 @@ +package com.semmle.js.extractor; + +import java.nio.file.Path; + +public class VirtualSourceRoot { + private Path sourceRoot; + private Path virtualSourceRoot; + + public static final VirtualSourceRoot none = new VirtualSourceRoot(null, null); + + public VirtualSourceRoot(Path sourceRoot, Path virtualSourceRoot) { + this.sourceRoot = sourceRoot; + this.virtualSourceRoot = virtualSourceRoot; + } + + /** + * Returns the source root mirrored by {@link #getVirtualSourceRoot()} or null if no + * virtual source root exists. + * + *

When invoked from the AutoBuilder, this corresponds to the source root. When invoked from + * ODASA, there is no notion of source root, so this is always null in that context. + */ + public Path getSourceRoot() { + return sourceRoot; + } + + /** + * Returns the virtual source root or null if no virtual source root exists. + * + *

The virtual source root is a directory hierarchy that mirrors the real source root, where + * dependencies are installed. + */ + public Path getVirtualSourceRoot() { + return virtualSourceRoot; + } + + private static Path translate(Path oldRoot, Path newRoot, Path file) { + if (oldRoot == null || newRoot == null) return null; + Path relative = oldRoot.relativize(file); + if (relative.startsWith("..") || relative.isAbsolute()) return null; + return newRoot.resolve(relative); + } + + public Path toVirtualFile(Path file) { + return translate(sourceRoot, virtualSourceRoot, file); + } + + public Path fromVirtualFile(Path file) { + return translate(virtualSourceRoot, sourceRoot, file); + } + + @Override + public String toString() { + return "[sourceRoot=" + sourceRoot + ", virtualSourceRoot=" + virtualSourceRoot + "]"; + } +} diff --git a/javascript/extractor/src/com/semmle/js/parser/TypeScriptParser.java b/javascript/extractor/src/com/semmle/js/parser/TypeScriptParser.java index 24451b581e0b..2dea7826879f 100644 --- a/javascript/extractor/src/com/semmle/js/parser/TypeScriptParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/TypeScriptParser.java @@ -31,6 +31,7 @@ import com.semmle.js.extractor.DependencyInstallationResult; import com.semmle.js.extractor.EnvironmentVariables; import com.semmle.js.extractor.ExtractionMetrics; +import com.semmle.js.extractor.VirtualSourceRoot; import com.semmle.js.parser.JSParser.Result; import com.semmle.ts.extractor.TypeTable; import com.semmle.util.data.StringUtil; @@ -501,8 +502,8 @@ private static Set getFilesFromJsonArray(JsonArray array) { /** * Returns the set of files included by the inclusion pattern in the given tsconfig.json file. */ - public Set getOwnFiles(File tsConfigFile, DependencyInstallationResult deps) { - JsonObject request = makeLoadCommand("get-own-files", tsConfigFile, deps); + public Set getOwnFiles(File tsConfigFile, DependencyInstallationResult deps, VirtualSourceRoot vroot) { + JsonObject request = makeLoadCommand("get-own-files", tsConfigFile, deps, vroot); JsonObject response = talkToParserWrapper(request); try { checkResponseType(response, "file-list"); @@ -521,8 +522,8 @@ public Set getOwnFiles(File tsConfigFile, DependencyInstallationResult dep * *

Only one project should be opened at once. */ - public ParsedProject openProject(File tsConfigFile, DependencyInstallationResult deps) { - JsonObject request = makeLoadCommand("open-project", tsConfigFile, deps); + public ParsedProject openProject(File tsConfigFile, DependencyInstallationResult deps, VirtualSourceRoot vroot) { + JsonObject request = makeLoadCommand("open-project", tsConfigFile, deps, vroot); JsonObject response = talkToParserWrapper(request); try { checkResponseType(response, "project-opened"); @@ -536,18 +537,18 @@ public ParsedProject openProject(File tsConfigFile, DependencyInstallationResult } } - private JsonObject makeLoadCommand(String command, File tsConfigFile, DependencyInstallationResult deps) { + private JsonObject makeLoadCommand(String command, File tsConfigFile, DependencyInstallationResult deps, VirtualSourceRoot vroot) { JsonObject request = new JsonObject(); request.add("command", new JsonPrimitive(command)); request.add("tsConfig", new JsonPrimitive(tsConfigFile.getPath())); request.add("packageEntryPoints", mapToArray(deps.getPackageEntryPoints())); request.add("packageJsonFiles", mapToArray(deps.getPackageJsonFiles())); - request.add("sourceRoot", deps.getSourceRoot() == null + request.add("sourceRoot", vroot.getSourceRoot() == null ? JsonNull.INSTANCE - : new JsonPrimitive(deps.getSourceRoot().toString())); - request.add("virtualSourceRoot", deps.getVirtualSourceRoot() == null + : new JsonPrimitive(vroot.getSourceRoot().toString())); + request.add("virtualSourceRoot", vroot.getVirtualSourceRoot() == null ? JsonNull.INSTANCE - : new JsonPrimitive(deps.getVirtualSourceRoot().toString())); + : new JsonPrimitive(vroot.getVirtualSourceRoot().toString())); return request; } From 805deb13c01b59ea93aa0ac24a5b10189e2d90ce Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 21:18:01 +0100 Subject: [PATCH 07/19] JS: Fix whitespace --- .../extractor/src/com/semmle/js/extractor/AutoBuild.java | 8 ++++---- .../src/com/semmle/js/extractor/ExtractorConfig.java | 4 ++-- .../src/com/semmle/js/extractor/VirtualSourceRoot.java | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 671d8b9bfff3..ea20b45367fe 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -574,15 +574,15 @@ public int compare(File f1, File f2) { public class FileExtractors { FileExtractor defaultExtractor; Map customExtractors = new LinkedHashMap<>(); - + FileExtractors(FileExtractor defaultExtractor) { this.defaultExtractor = defaultExtractor; } - + public FileExtractor forFile(Path f) { return customExtractors.getOrDefault(FileUtil.extension(f), defaultExtractor); } - + public FileType fileType(Path f) { return forFile(f).getFileType(f.toFile()); } @@ -630,7 +630,7 @@ private void extractSource() throws IOException { // extract remaining files extractFiles( - filesToExtract, extractedFiles, extractors, + filesToExtract, extractedFiles, extractors, f -> !(hasTypeScriptFiles && isFileDerivedFromTypeScriptFile(f, extractedFiles))); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index 441d51bfb621..5cd5d5ec4b10 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -236,7 +236,7 @@ public String toString() { /** The default character encoding to use for parsing source files. */ private String defaultEncoding; - + private VirtualSourceRoot virtualSourceRoot; public ExtractorConfig(boolean experimental) { @@ -460,7 +460,7 @@ public ExtractorConfig withDefaultEncoding(String defaultEncoding) { public VirtualSourceRoot getVirtualSourceRoot() { return virtualSourceRoot; } - + public ExtractorConfig withVirtualSourceRoot(VirtualSourceRoot virtualSourceRoot) { ExtractorConfig res = new ExtractorConfig(this); res.virtualSourceRoot = virtualSourceRoot; diff --git a/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java b/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java index c5fb4a3061a0..5c7a96e637f0 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java +++ b/javascript/extractor/src/com/semmle/js/extractor/VirtualSourceRoot.java @@ -5,7 +5,7 @@ public class VirtualSourceRoot { private Path sourceRoot; private Path virtualSourceRoot; - + public static final VirtualSourceRoot none = new VirtualSourceRoot(null, null); public VirtualSourceRoot(Path sourceRoot, Path virtualSourceRoot) { @@ -33,7 +33,7 @@ public Path getSourceRoot() { public Path getVirtualSourceRoot() { return virtualSourceRoot; } - + private static Path translate(Path oldRoot, Path newRoot, Path file) { if (oldRoot == null || newRoot == null) return null; Path relative = oldRoot.relativize(file); From 2c1567aedd6f409020c531197679bd84a9d76c1d Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 21:26:07 +0100 Subject: [PATCH 08/19] JS: Don't extract TypeScript from HTML --- .../semmle/js/extractor/HTMLExtractor.java | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index 3ed56d43cadf..e07bc414211f 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -59,6 +59,7 @@ public LoCInfo extract(TextualExtractor textualExtractor) { Segment content = elt.getContent(); String source = content.toString(); + boolean isTypeScript = isTypeScriptTag(elt); /* * Script blocks in XHTML files may wrap (parts of) their code inside CDATA sections. @@ -81,7 +82,8 @@ public LoCInfo extract(TextualExtractor textualExtractor) { textualExtractor, source, contentStart.getRow(), - contentStart.getColumn()); + contentStart.getColumn(), + isTypeScript); } } } else { @@ -103,7 +105,8 @@ public LoCInfo extract(TextualExtractor textualExtractor) { textualExtractor, source, valueStart.getRow(), - valueStart.getColumn()); + valueStart.getColumn(), + false /* isTypeScript */); } else if (source.startsWith("javascript:")) { source = source.substring(11); snippetLoC = @@ -114,7 +117,8 @@ public LoCInfo extract(TextualExtractor textualExtractor) { textualExtractor, source, valueStart.getRow(), - valueStart.getColumn() + 11); + valueStart.getColumn() + 11, + false /* isTypeScript */); } } } @@ -143,11 +147,9 @@ public LoCInfo extract(TextualExtractor textualExtractor) { */ private SourceType getScriptSourceType(Element script) { String scriptType = getAttributeValueLC(script, "type"); - String scriptLanguage = getAttributeValueLC(script, "language"); + String scriptLanguage = getScriptLanguage(script); - if (scriptLanguage == null) { // Vue templates use 'lang' instead of 'language'. - scriptLanguage = getAttributeValueLC(script, "lang"); - } + if (isTypeScriptTag(script)) return config.getSourceType(); // if `type` and `language` are both either missing, contain the // string "javascript", or if `type` is the string "text/jsx", this is a plain script @@ -171,6 +173,23 @@ private SourceType getScriptSourceType(Element script) { return null; } + private String getScriptLanguage(Element script) { + String scriptLanguage = getAttributeValueLC(script, "language"); + + if (scriptLanguage == null) { // Vue templates use 'lang' instead of 'language'. + scriptLanguage = getAttributeValueLC(script, "lang"); + } + return scriptLanguage; + } + + private boolean isTypeScriptTag(Element script) { + String language = getScriptLanguage(script); + if ("ts".equals(language) || "typescript".equals(language)) return true; + String type = getAttributeValueLC(script, "type"); + if (type != null && type.contains("typescript")) return true; + return false; + } + /** * Get the value of attribute attr of element elt in lower case; if the * attribute has no value, null is returned. @@ -187,7 +206,10 @@ private LoCInfo extractSnippet( TextualExtractor textualExtractor, String source, int line, - int column) { + int column, + boolean isTypeScript) { + if (isTypeScript) + return null; // not supported right now TrapWriter trapwriter = textualExtractor.getTrapwriter(); LocationManager locationManager = textualExtractor.getLocationManager(); LocationManager scriptLocationManager = From 1297d0f4146353ed81d66e125f1558160f302a0b Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 21:48:18 +0100 Subject: [PATCH 09/19] JS: Extract HTML before TypeScript --- .../com/semmle/js/extractor/AutoBuild.java | 25 ++++++++++++++----- .../js/extractor/test/AutoBuildTests.java | 4 ++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index ea20b45367fe..d85c4cd28a56 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -621,6 +622,13 @@ private void extractSource() throws IOException { dependencyInstallationResult = this.preparePackagesAndDependencies(filesToExtract); } Set extractedFiles = new LinkedHashSet<>(); + + // Extract HTML files as they may contain TypeScript + CompletableFuture htmlFuture = extractFiles( + filesToExtract, extractedFiles, extractors, + f -> extractors.fileType(f) == FileType.HTML); + + htmlFuture.join(); // Wait for HTML extraction to be finished. // extract TypeScript projects and files extractTypeScript(filesToExtract, extractedFiles, @@ -634,12 +642,13 @@ private void extractSource() throws IOException { f -> !(hasTypeScriptFiles && isFileDerivedFromTypeScriptFile(f, extractedFiles))); } - private void extractFiles( + private CompletableFuture extractFiles( Set filesToExtract, Set extractedFiles, FileExtractors extractors, Predicate shouldExtract) { + List> futures = new ArrayList<>(); for (Path f : filesToExtract) { if (extractedFiles.contains(f)) continue; @@ -647,8 +656,9 @@ private void extractFiles( continue; } extractedFiles.add(f); - extract(extractors.forFile(f), f, null); + futures.add(extract(extractors.forFile(f), f, null)); } + return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])); } /** @@ -1164,10 +1174,13 @@ private SourceType getSourceType() { *

If the state is {@code null}, the extraction job will be submitted to the {@link * #threadPool}, otherwise extraction will happen on the main thread. */ - protected void extract(FileExtractor extractor, Path file, ExtractorState state) { - if (state == null && threadPool != null) - threadPool.submit(() -> doExtract(extractor, file, state)); - else doExtract(extractor, file, state); + protected CompletableFuture extract(FileExtractor extractor, Path file, ExtractorState state) { + if (state == null && threadPool != null) { + return CompletableFuture.runAsync(() -> doExtract(extractor, file, state), threadPool); + } else { + doExtract(extractor, file, state); + return CompletableFuture.completedFuture(null); + } } private void doExtract(FileExtractor extractor, Path file, ExtractorState state) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java index 38699936db38..7d1de63d0830 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java @@ -15,6 +15,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CompletableFuture; import org.junit.After; import org.junit.Assert; @@ -109,11 +110,12 @@ private void runTest() throws IOException { Set actual = new LinkedHashSet<>(); new AutoBuild() { @Override - protected void extract(FileExtractor extractor, Path file, ExtractorState state) { + protected CompletableFuture extract(FileExtractor extractor, Path file, ExtractorState state) { String extracted = file.toString(); if (extractor.getConfig().hasFileType()) extracted += ":" + extractor.getFileType(file.toFile()); actual.add(extracted); + return CompletableFuture.completedFuture(null); } @Override From d3b9ebe1d2200b365e184515e121cc91ddd2a535 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Mon, 29 Jun 2020 08:22:54 +0100 Subject: [PATCH 10/19] JS: Perform glob matching across source roots --- javascript/extractor/lib/typescript/src/main.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/javascript/extractor/lib/typescript/src/main.ts b/javascript/extractor/lib/typescript/src/main.ts index e4c867d0b43c..a8f62e80d6ef 100644 --- a/javascript/extractor/lib/typescript/src/main.ts +++ b/javascript/extractor/lib/typescript/src/main.ts @@ -414,7 +414,18 @@ function loadTsConfig(command: LoadCommand): LoadedConfig { */ let parseConfigHost: ts.ParseConfigHost = { useCaseSensitiveFileNames: true, - readDirectory: ts.sys.readDirectory, // No need to override traversal/glob matching + readDirectory: (rootDir, extensions, excludes?, includes?, depth?) => { + // Perform the glob matching in both real and virtual source roots. + let originalResults = ts.sys.readDirectory(rootDir, extensions, excludes, includes, depth) + let virtualDir = virtualSourceRoot.toVirtualPath(rootDir); + if (virtualDir == null) { + return originalResults; + } + // Make sure glob matching does not to discover anything in node_modules. + let virtualExcludes = [ ...(excludes || []), '**/node_modules/**/*' ]; + let virtualResults = ts.sys.readDirectory(virtualDir, extensions, virtualExcludes, includes, depth) + return [ ...originalResults, ...virtualResults ]; + }, fileExists: (path: string) => { return ts.sys.fileExists(path) || virtualSourceRoot.toVirtualPathIfFileExists(path) != null From da58fb5e62378874f2a6d25879ba2c9dfbd8bda9 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Fri, 26 Jun 2020 22:54:44 +0100 Subject: [PATCH 11/19] JS: Resolve relative imports across real and virtual source roots --- .../extractor/lib/typescript/src/common.ts | 19 ++++++++++--------- .../lib/typescript/src/virtual_source_root.ts | 19 +++++++++++++++---- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/javascript/extractor/lib/typescript/src/common.ts b/javascript/extractor/lib/typescript/src/common.ts index 1dbe54b46328..23c0e82c06a3 100644 --- a/javascript/extractor/lib/typescript/src/common.ts +++ b/javascript/extractor/lib/typescript/src/common.ts @@ -71,10 +71,19 @@ export class Project { redirectedReference: ts.ResolvedProjectReference, options: ts.CompilerOptions) { + let oppositePath = + this.virtualSourceRoot.toVirtualPath(containingFile) || + this.virtualSourceRoot.fromVirtualPath(containingFile); + const { host, resolutionCache } = this; return moduleNames.map((moduleName) => { let redirected = this.redirectModuleName(moduleName, containingFile, options); if (redirected != null) return redirected; + if (oppositePath != null) { + // If the containing file is in the virtual source root, try resolving from the real source root, and vice versa. + redirected = ts.resolveModuleName(moduleName, oppositePath, options, host, resolutionCache).resolvedModule; + if (redirected != null) return redirected; + } return ts.resolveModuleName(moduleName, containingFile, options, host, resolutionCache).resolvedModule; }); } @@ -90,15 +99,7 @@ export class Project { // Get the overridden location of this package, if one exists. let packageEntryPoint = this.packageEntryPoints.get(packageName); - if (packageEntryPoint == null) { - // The package is not overridden, but we have established that it begins with a valid package name. - // Do a lookup in the virtual source root (where dependencies are installed) by changing the 'containing file'. - let virtualContainingFile = this.virtualSourceRoot.toVirtualPath(containingFile); - if (virtualContainingFile != null) { - return ts.resolveModuleName(moduleName, virtualContainingFile, options, this.host, this.resolutionCache).resolvedModule; - } - return null; - } + if (packageEntryPoint == null) return null; // If the requested module name is exactly the overridden package name, // return the entry point file (it is not necessarily called `index.ts`). diff --git a/javascript/extractor/lib/typescript/src/virtual_source_root.ts b/javascript/extractor/lib/typescript/src/virtual_source_root.ts index 8c7c57c24b59..79adab1eeba3 100644 --- a/javascript/extractor/lib/typescript/src/virtual_source_root.ts +++ b/javascript/extractor/lib/typescript/src/virtual_source_root.ts @@ -16,14 +16,25 @@ export class VirtualSourceRoot { private virtualSourceRoot: string | null, ) {} + private static translate(oldRoot: string, newRoot: string, path: string) { + if (!oldRoot || !newRoot) return null; + let relative = pathlib.relative(oldRoot, path); + if (relative.startsWith('..') || pathlib.isAbsolute(relative)) return null; + return pathlib.join(newRoot, relative); + } + /** * Maps a path under the real source root to the corresponding path in the virtual source root. */ public toVirtualPath(path: string) { - if (!this.virtualSourceRoot || !this.sourceRoot) return null; - let relative = pathlib.relative(this.sourceRoot, path); - if (relative.startsWith('..') || pathlib.isAbsolute(relative)) return null; - return pathlib.join(this.virtualSourceRoot, relative); + return VirtualSourceRoot.translate(this.sourceRoot, this.virtualSourceRoot, path); + } + + /** + * Maps a path under the virtual source root to the corresponding path in the real source root. + */ + public fromVirtualPath(path: string) { + return VirtualSourceRoot.translate(this.virtualSourceRoot, this.sourceRoot, path); } /** From 9c65318f992f4c50c53008d004e23810472dafa3 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Mon, 29 Jun 2020 08:23:34 +0100 Subject: [PATCH 12/19] JS: Extract TypeScript in HTML files to a snippet in virtual source root --- .../com/semmle/js/extractor/AutoBuild.java | 31 +++++----- .../semmle/js/extractor/ExtractorState.java | 15 +++++ .../semmle/js/extractor/FileExtractor.java | 59 +++++++++++++------ .../com/semmle/js/extractor/FileSnippet.java | 36 +++++++++++ .../semmle/js/extractor/HTMLExtractor.java | 30 ++++++++-- .../semmle/js/extractor/TextualExtractor.java | 22 ++++++- .../js/extractor/TypeScriptExtractor.java | 18 +++--- .../js/extractor/VirtualSourceRoot.java | 19 ++++++ .../js/extractor/test/AutoBuildTests.java | 5 +- 9 files changed, 186 insertions(+), 49 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/extractor/FileSnippet.java diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index d85c4cd28a56..8cf52cb3509b 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -213,6 +213,7 @@ public class AutoBuild { private boolean installDependencies = false; private int installDependenciesTimeout; private final VirtualSourceRoot virtualSourceRoot; + private ExtractorState state; /** The default timeout when running yarn, in milliseconds. */ public static final int INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT = 10 * 60 * 1000; // 10 minutes @@ -234,6 +235,7 @@ public AutoBuild() { setupFileTypes(); setupXmlMode(); setupMatchers(); + this.state = new ExtractorState(); } private String getEnvVar(String envVarName) { @@ -534,7 +536,7 @@ public File lookup(String source, ExtractorConfig config, FileType type) { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - if (".js".equals(FileUtil.extension(file.toString()))) extract(extractor, file, null); + if (".js".equals(FileUtil.extension(file.toString()))) extract(extractor, file, true); return super.visitFile(file, attrs); } }; @@ -656,7 +658,7 @@ private CompletableFuture extractFiles( continue; } extractedFiles.add(f); - futures.add(extract(extractors.forFile(f), f, null)); + futures.add(extract(extractors.forFile(f), f, true)); } return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])); } @@ -980,9 +982,8 @@ private Set extractTypeScript( List tsconfig, DependencyInstallationResult deps) { if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) { - ExtractorState extractorState = new ExtractorState(); - TypeScriptParser tsParser = extractorState.getTypeScriptParser(); - verifyTypeScriptInstallation(extractorState); + TypeScriptParser tsParser = state.getTypeScriptParser(); + verifyTypeScriptInstallation(state); // Collect all files included in a tsconfig.json inclusion pattern. // If a given file is referenced by multiple tsconfig files, we prefer to extract it using @@ -1005,7 +1006,10 @@ private Set extractTypeScript( List typeScriptFiles = new ArrayList(); for (File sourceFile : project.getAllFiles()) { Path sourcePath = sourceFile.toPath(); - if (!files.contains(normalizePath(sourcePath))) continue; + Path normalizedFile = normalizePath(sourcePath); + if (!files.contains(normalizedFile) && !state.getSnippets().containsKey(normalizedFile)) { + continue; + } if (!project.getOwnFiles().contains(sourceFile) && explicitlyIncludedFiles.contains(sourceFile)) continue; if (extractors.fileType(sourcePath) != FileType.TYPESCRIPT) { // For the time being, skip non-TypeScript files, even if the TypeScript @@ -1017,7 +1021,7 @@ private Set extractTypeScript( } } typeScriptFiles.sort(PATH_ORDERING); - extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors, extractorState); + extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractors); tsParser.closeProject(projectFile); } @@ -1036,7 +1040,7 @@ private Set extractTypeScript( } } if (!remainingTypeScriptFiles.isEmpty()) { - extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractors, extractorState); + extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractors); } // The TypeScript compiler instance is no longer needed. @@ -1122,16 +1126,15 @@ public void verifyTypeScriptInstallation(ExtractorState extractorState) { public void extractTypeScriptFiles( List files, Set extractedFiles, - FileExtractors extractors, - ExtractorState extractorState) { + FileExtractors extractors) { List list = files .stream() .sorted(PATH_ORDERING) .map(p -> p.toFile()).collect(Collectors.toList()); - extractorState.getTypeScriptParser().prepareFiles(list); + state.getTypeScriptParser().prepareFiles(list); for (Path path : files) { extractedFiles.add(path); - extract(extractors.forFile(path), path, extractorState); + extract(extractors.forFile(path), path, false); } } @@ -1174,8 +1177,8 @@ private SourceType getSourceType() { *

If the state is {@code null}, the extraction job will be submitted to the {@link * #threadPool}, otherwise extraction will happen on the main thread. */ - protected CompletableFuture extract(FileExtractor extractor, Path file, ExtractorState state) { - if (state == null && threadPool != null) { + protected CompletableFuture extract(FileExtractor extractor, Path file, boolean concurrent) { + if (concurrent && threadPool != null) { return CompletableFuture.runAsync(() -> doExtract(extractor, file, state), threadPool); } else { doExtract(extractor, file, state); diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java index f347efdf86ed..33505e8bb37e 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorState.java @@ -1,5 +1,8 @@ package com.semmle.js.extractor; +import java.nio.file.Path; +import java.util.concurrent.ConcurrentHashMap; + import com.semmle.js.parser.TypeScriptParser; /** @@ -17,16 +20,28 @@ */ public class ExtractorState { private TypeScriptParser typeScriptParser = new TypeScriptParser(); + + private final ConcurrentHashMap snippets = new ConcurrentHashMap<>(); public TypeScriptParser getTypeScriptParser() { return typeScriptParser; } + /** + * Returns the mapping that denotes where a snippet file originated from. + * + *

The map is thread-safe and may be mutated by the caller. + */ + public ConcurrentHashMap getSnippets() { + return snippets; + } + /** * Makes this semantically equivalent to a fresh state, but may internally retain shared resources * that are expensive to reacquire. */ public void reset() { typeScriptParser.reset(); + snippets.clear(); } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index cacc6b6cc9c9..3c93bcfe2e20 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -1,15 +1,5 @@ package com.semmle.js.extractor; -import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase; -import com.semmle.js.extractor.trapcache.CachingTrapWriter; -import com.semmle.js.extractor.trapcache.ITrapCache; -import com.semmle.util.data.StringUtil; -import com.semmle.util.exception.Exceptions; -import com.semmle.util.extraction.ExtractorOutputConfig; -import com.semmle.util.files.FileUtil; -import com.semmle.util.io.WholeIO; -import com.semmle.util.trap.TrapWriter; -import com.semmle.util.trap.TrapWriter.Label; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; @@ -17,10 +7,22 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.LinkedHashSet; import java.util.Set; import java.util.regex.Pattern; +import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase; +import com.semmle.js.extractor.trapcache.CachingTrapWriter; +import com.semmle.js.extractor.trapcache.ITrapCache; +import com.semmle.util.data.StringUtil; +import com.semmle.util.exception.Exceptions; +import com.semmle.util.extraction.ExtractorOutputConfig; +import com.semmle.util.files.FileUtil; +import com.semmle.util.io.WholeIO; +import com.semmle.util.trap.TrapWriter; +import com.semmle.util.trap.TrapWriter.Label; + /** * The file extractor extracts a single file and handles source archive population and TRAP caching; * it delegates to the appropriate {@link IExtractor} for extracting the contents of the file. @@ -47,7 +49,7 @@ public static enum FileType { HTML(".htm", ".html", ".xhtm", ".xhtml", ".vue") { @Override public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) { - return new HTMLExtractor(config); + return new HTMLExtractor(config, state); } @Override @@ -293,7 +295,7 @@ private boolean hasUnrecognizedShebang(byte[] bytes, int length) { @Override public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) { - return new TypeScriptExtractor(config, state.getTypeScriptParser()); + return new TypeScriptExtractor(config, state); } @Override @@ -398,6 +400,10 @@ public boolean supports(File f) { /** @return the number of lines of code extracted, or {@code null} if the file was cached */ public Integer extract(File f, ExtractorState state) throws IOException { + FileSnippet snippet = state.getSnippets().get(f.toPath()); + if (snippet != null) { + return this.extractSnippet(f.toPath(), snippet, state); + } // populate source archive String source = new WholeIO(config.getDefaultEncoding()).strictread(f); @@ -414,6 +420,25 @@ public Integer extract(File f, ExtractorState state) throws IOException { return extractContents(f, fileLabel, source, locationManager, state); } + /** + * Extract the contents of a file that is a snippet from another file. + * + *

A trap file will be derived from the snippet file, but its file label, source locations, and + * source archive entry are based on the original file. + */ + private Integer extractSnippet(Path file, FileSnippet origin, ExtractorState state) throws IOException { + TrapWriter trapwriter = outputConfig.getTrapWriterFactory().mkTrapWriter(file.toFile()); + + File originalFile = origin.getOriginalFile().toFile(); + Label fileLabel = trapwriter.populateFile(originalFile); + LocationManager locationManager = new LocationManager(originalFile, trapwriter, fileLabel); + locationManager.setStart(origin.getLine(), origin.getColumn()); + + String source = new WholeIO(config.getDefaultEncoding()).strictread(file); + + return extractContents(file.toFile(), fileLabel, source, locationManager, state); + } + /** * Extract the contents of a file, potentially making use of cached information. * @@ -436,20 +461,20 @@ public Integer extract(File f, ExtractorState state) throws IOException { * obviously, no caching is done in that scenario. */ private Integer extractContents( - File f, Label fileLabel, String source, LocationManager locationManager, ExtractorState state) + File extractedFile, Label fileLabel, String source, LocationManager locationManager, ExtractorState state) throws IOException { ExtractionMetrics metrics = new ExtractionMetrics(); metrics.startPhase(ExtractionPhase.FileExtractor_extractContents); metrics.setLength(source.length()); metrics.setFileLabel(fileLabel); TrapWriter trapwriter = locationManager.getTrapWriter(); - FileType fileType = getFileType(f); + FileType fileType = getFileType(extractedFile); File cacheFile = null, // the cache file for this extraction resultFile = null; // the final result TRAP file for this extraction if (bumpIdCounter(trapwriter)) { - resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(f); + resultFile = outputConfig.getTrapWriterFactory().getTrapFileFor(extractedFile); } // check whether we can perform caching if (resultFile != null && fileType.isTrapCachingAllowed()) { @@ -475,7 +500,7 @@ private Integer extractContents( trapwriter = new CachingTrapWriter(cacheFile, resultFile); bumpIdCounter(trapwriter); // re-initialise the location manager, since it keeps a reference to the TRAP writer - locationManager = new LocationManager(f, trapwriter, locationManager.getFileLabel()); + locationManager = new LocationManager(extractedFile, trapwriter, locationManager.getFileLabel()); } // now do the extraction itself @@ -484,7 +509,7 @@ private Integer extractContents( IExtractor extractor = fileType.mkExtractor(config, state); TextualExtractor textualExtractor = new TextualExtractor( - trapwriter, locationManager, source, config.getExtractLines(), metrics); + trapwriter, locationManager, source, config.getExtractLines(), metrics, extractedFile); LoCInfo loc = extractor.extract(textualExtractor); int numLines = textualExtractor.getNumLines(); int linesOfCode = loc.getLinesOfCode(), linesOfComments = loc.getLinesOfComments(); diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileSnippet.java b/javascript/extractor/src/com/semmle/js/extractor/FileSnippet.java new file mode 100644 index 000000000000..23bee94669f5 --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/extractor/FileSnippet.java @@ -0,0 +1,36 @@ +package com.semmle.js.extractor; + +import java.nio.file.Path; + +/** + * Denotes where a code snippet originated from within a file. + */ +public class FileSnippet { + private Path originalFile; + private int line; + private int column; + private int topLevelKind; + + public FileSnippet(Path originalFile, int line, int column, int topLevelKind) { + this.originalFile = originalFile; + this.line = line; + this.column = column; + this.topLevelKind = topLevelKind; + } + + public Path getOriginalFile() { + return originalFile; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + + public int getTopLevelKind() { + return topLevelKind; + } +} diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index e07bc414211f..0dbaf2761fbd 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -1,11 +1,13 @@ package com.semmle.js.extractor; +import java.nio.file.Path; import java.util.regex.Pattern; import com.semmle.js.extractor.ExtractorConfig.Platform; import com.semmle.js.extractor.ExtractorConfig.SourceType; import com.semmle.js.parser.ParseError; import com.semmle.util.data.StringUtil; +import com.semmle.util.io.WholeIO; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; @@ -28,9 +30,11 @@ public class HTMLExtractor implements IExtractor { Pattern.CASE_INSENSITIVE); private final ExtractorConfig config; + private final ExtractorState state; - public HTMLExtractor(ExtractorConfig config) { + public HTMLExtractor(ExtractorConfig config, ExtractorState state) { this.config = config.withPlatform(Platform.WEB); + this.state = state; } @Override @@ -208,8 +212,25 @@ private LoCInfo extractSnippet( int line, int column, boolean isTypeScript) { - if (isTypeScript) - return null; // not supported right now + if (isTypeScript) { + Path file = textualExtractor.getExtractedFile().toPath(); + FileSnippet snippet = new FileSnippet(file, line, column, toplevelKind); + VirtualSourceRoot vroot = config.getVirtualSourceRoot(); + // Vue files are special in that they can be imported as modules, and may only contain one + + From 182e4ce7274c7c9965df49fdc64cfa7ac09b67a5 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Mon, 29 Jun 2020 19:10:28 +0100 Subject: [PATCH 18/19] JS: Autoformat --- .../library-tests/TypeScript/EmbeddedInScript/Test.ql | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql index 613a23fe2493..43a718bf77b9 100644 --- a/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql +++ b/javascript/ql/test/library-tests/TypeScript/EmbeddedInScript/Test.ql @@ -4,10 +4,6 @@ query ClassDefinition classDeclaration() { any() } query Type exprType(Expr e) { result = e.getType() } -query predicate symbols(Module mod, CanonicalName name) { - ast_node_symbol(mod, name) -} +query predicate symbols(Module mod, CanonicalName name) { ast_node_symbol(mod, name) } -query predicate importTarget(Import imprt, Module mod) { - imprt.getImportedModule() = mod -} +query predicate importTarget(Import imprt, Module mod) { imprt.getImportedModule() = mod } From 7a2c65f63837bb10839a3287e0ce573f7f5af8b6 Mon Sep 17 00:00:00 2001 From: Asger Feldthaus Date: Tue, 30 Jun 2020 09:25:06 +0100 Subject: [PATCH 19/19] JS: Fix virtual source root in AutoBuildTest --- .../extractor/src/com/semmle/js/extractor/AutoBuild.java | 6 +++++- .../src/com/semmle/js/extractor/test/AutoBuildTests.java | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java index 8cf52cb3509b..66cb57ba30fd 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java +++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java @@ -231,13 +231,17 @@ public AutoBuild() { Env.systemEnv() .getInt( "LGTM_INDEX_TYPESCRIPT_INSTALL_DEPS_TIMEOUT", INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT); - this.virtualSourceRoot = new VirtualSourceRoot(LGTM_SRC, toRealPath(Paths.get(EnvironmentVariables.getScratchDir()))); + this.virtualSourceRoot = makeVirtualSourceRoot(); setupFileTypes(); setupXmlMode(); setupMatchers(); this.state = new ExtractorState(); } + protected VirtualSourceRoot makeVirtualSourceRoot() { + return new VirtualSourceRoot(LGTM_SRC, toRealPath(Paths.get(EnvironmentVariables.getScratchDir()))); + } + private String getEnvVar(String envVarName) { return getEnvVar(envVarName, null); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java index 2a1da6628268..01005ebcfa46 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java +++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java @@ -28,6 +28,7 @@ import com.semmle.js.extractor.ExtractorState; import com.semmle.js.extractor.FileExtractor; import com.semmle.js.extractor.FileExtractor.FileType; +import com.semmle.js.extractor.VirtualSourceRoot; import com.semmle.util.data.StringUtil; import com.semmle.util.exception.UserError; import com.semmle.util.files.FileUtil; @@ -137,6 +138,11 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set return DependencyInstallationResult.empty; } + @Override + protected VirtualSourceRoot makeVirtualSourceRoot() { + return VirtualSourceRoot.none; // not used in these tests + } + @Override protected void extractXml() throws IOException { Files.walkFileTree(