Skip to content

Commit

Permalink
Speeds up reading of zip source files
Browse files Browse the repository at this point in the history
By default zip files created via URL connection supports caching. However this built-in cache never invalidate and we don't want that since zip file contents might change over time.

Earlier attempt to fix this issue killed caching completely which resulted in each individual entry to be independently loaded from the zip archive. That is very inefficient especially for big zip files.

This patch replaces earlier workaround with a timestamp controlled caching so we avoid inefficient re-reads.

With this CL, since it is no longer I/O bounded highly parallel parsing of AST (8+ threads) is 3x faster while the total time of bundling reduced by 40-50% (for J2CL sample app).

The improvement should reflect similarly for other tools (MF, MSS) as well but I didn't measure how much that will be.

The chosen default cache size of 1000 has memory impact around 5MB which is negligible (this is because ZipFile is already designed to be cached indefinitely by JDK since that is default behavior for URLs). Memory testing results/methodology could be found here:
http://[]/experimental/users/goktug/zipstress/Main.java?l=30-35

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=232400779
  • Loading branch information
gkdn authored and blickly committed Feb 6, 2019
1 parent f04d80f commit 3888713
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 32 deletions.
48 changes: 16 additions & 32 deletions src/com/google/javascript/jscomp/SourceFile.java
Expand Up @@ -24,7 +24,6 @@
import com.google.common.annotations.GwtIncompatible;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.io.CharStreams;
import com.google.common.io.Resources;
import com.google.javascript.rhino.StaticSourceFile;
import com.google.javascript.rhino.StaticSourceFile.SourceKind;
import java.io.File;
Expand All @@ -36,8 +35,6 @@
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -356,7 +353,6 @@ public static List<SourceFile> fromZipFile(String zipName, Charset inputCharset)
}

private static final String BANG_SLASH = "!/";
private static final String JAR_URL_PREFIX = "jar:file:";

private static boolean isZipEntry(String path) {
return path.contains(".zip!" + File.separator)
Expand All @@ -376,18 +372,16 @@ private static SourceFile fromZipEntry(String zipURL, Charset inputCharset) {
}
}

@GwtIncompatible("java.net.URL")
@GwtIncompatible("java.io.File")
public static SourceFile fromZipEntry(
String originalZipPath, String absoluteZipPath, String entryPath, Charset inputCharset)
throws MalformedURLException {
String zipEntryPath =
JAR_URL_PREFIX + absoluteZipPath + BANG_SLASH + entryPath.replace(File.separator, "/");
URL zipEntryUrl = new URL(zipEntryPath);

// No longer throws MalformedURLException but we are keeping it for backward compatbility.
return builder()
.withCharset(inputCharset)
.withOriginalPath(originalZipPath + BANG_SLASH + entryPath)
.buildFromUrl(zipEntryUrl);
.buildFromZipEntry(
new ZipEntryReader(absoluteZipPath, entryPath.replace(File.separator, "/")));
}

@GwtIncompatible("java.io.File")
Expand Down Expand Up @@ -500,11 +494,11 @@ public SourceFile buildFromPath(Path path) {
return new OnDisk(path, originalPath, charset, kind);
}

@GwtIncompatible("java.net.URL")
public SourceFile buildFromUrl(URL url) {
checkNotNull(url);
@GwtIncompatible("java.io.File")
public SourceFile buildFromZipEntry(ZipEntryReader zipEntryReader) {
checkNotNull(zipEntryReader);
checkNotNull(charset);
return new AtUrl(url, originalPath, charset, kind);
return new SourceFile.AtZip(zipEntryReader, originalPath, charset, kind);
}

public SourceFile buildFromCode(String fileName, String code) {
Expand Down Expand Up @@ -654,21 +648,19 @@ private void readObject(java.io.ObjectInputStream in) throws Exception {
}

/**
* A source file at a URL where the code is only read into memory if absolutely necessary. We will
* A source file at a zip where the code is only read into memory if absolutely necessary. We will
* try to delay loading the code into memory as long as possible.
*
* <p>In practice this is used to load code in entries inside of zip files.
*/
@GwtIncompatible("java.net.URL")
private static class AtUrl extends SourceFile {
@GwtIncompatible("java.io.File")
private static class AtZip extends SourceFile {
private static final long serialVersionUID = 1L;
private final URL url;
private final ZipEntryReader zipEntryReader;
private transient Charset inputCharset;

AtUrl(URL url, String originalPath, Charset c, SourceKind kind) {
AtZip(ZipEntryReader zipEntryReader, String originalPath, Charset c, SourceKind kind) {
super(originalPath, SourceKind.STRONG);
this.inputCharset = c;
this.url = url;
this.zipEntryReader = zipEntryReader;
setOriginalPath(originalPath);
}

Expand All @@ -677,15 +669,7 @@ public synchronized String getCode() throws IOException {
String cachedCode = super.getCode();

if (cachedCode == null) {
URLConnection urlConnection = url.openConnection();
// Perform the read through the URL connection while making sure that it does not internally
// cache, because its default internal caching would defeat our own cache management.
urlConnection.setUseCaches(false);
InputStream inputStream = urlConnection.getInputStream();
cachedCode = CharStreams.toString(new InputStreamReader(inputStream, inputCharset));
// Must close the stream or else the cache won't be cleared.
inputStream.close();

cachedCode = zipEntryReader.read(inputCharset);
super.setCode(cachedCode);
// Byte Order Mark can be removed by setCode
cachedCode = super.getCode();
Expand All @@ -702,7 +686,7 @@ public Reader getCodeReader() throws IOException {
return super.getCodeReader();
} else {
// If we haven't pulled the code into memory yet, don't.
return Resources.asCharSource(url, inputCharset).openStream();
return zipEntryReader.getReader(inputCharset);
}
}

Expand Down
128 changes: 128 additions & 0 deletions src/com/google/javascript/jscomp/ZipEntryReader.java
@@ -0,0 +1,128 @@
/*
* Copyright 2018 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp;

import com.google.common.annotations.GwtIncompatible;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.RemovalNotification;
import com.google.common.io.CharStreams;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileTime;
import java.util.zip.ZipFile;

/**
* A class that abstract entries from zip files via managed caching.
*
* <p>Normally zip files created via URL connection cache indefinite by JDK. That's not good since a
* zip file contents might change over time (e.g. compiler running as a worker). This class provides
* a timestamp controlled caching which ensures we always read up-to-date zip while avoiding wasting
* time by re-reading the zip for each entry.
*/
@GwtIncompatible("java.util.zip.ZipFile")
final class ZipEntryReader implements Serializable {

private static final long serialVersionUID = 1L;

private static final int ZIP_CACHE_SIZE =
Integer.parseInt(System.getProperty("jscomp.zipfile.cachesize", "1000"));

private static final LoadingCache<String, CachedZipFile> zipFileCache =
CacheBuilder.newBuilder()
.maximumSize(ZIP_CACHE_SIZE)
.removalListener(
(RemovalNotification<String, CachedZipFile> notification) -> {
try {
notification.getValue().maybeClose();
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.build(
new CacheLoader<String, CachedZipFile>() {
@Override
public CachedZipFile load(String key) throws IOException {
return new CachedZipFile(key);
}
});

private static class CachedZipFile {
private final Path path;
private ZipFile zipFile;
private volatile FileTime lastModified;

private CachedZipFile(String zipName) {
this.path = Paths.get(zipName);
}

private Reader getReader(String entryName, Charset charset) throws IOException {
refreshIfNeeded();
return new InputStreamReader(zipFile.getInputStream(zipFile.getEntry(entryName)), charset);
}

private void refreshIfNeeded() throws IOException {
FileTime newLastModified = Files.getLastModifiedTime(path);
if (newLastModified.equals(lastModified)) {
return;
}

synchronized (this) {
// Since we do double checked locking (newLastModified is checked out of synchronized), we
// should test the stamp again.
if (newLastModified.equals(lastModified)) {
return;
}

maybeClose();
zipFile = new ZipFile(path.toFile());
lastModified = newLastModified;
}
}

private void maybeClose() throws IOException {
if (zipFile != null) {
zipFile.close();
}
}
}

private final String zipPath;
private final String entryName;

public ZipEntryReader(String zipPath, String entryName) {
this.zipPath = zipPath;
this.entryName = entryName;
}

public Reader getReader(Charset charset) throws IOException {
CachedZipFile zipFile = zipFileCache.getUnchecked(zipPath);
return new BufferedReader(zipFile.getReader(entryName, charset));
}

public String read(Charset charset) throws IOException {
CachedZipFile zipFile = zipFileCache.getUnchecked(zipPath);
return CharStreams.toString(zipFile.getReader(entryName, charset));
}
}

0 comments on commit 3888713

Please sign in to comment.