apache · sapant-msft · Nov 20, 2019 · Nov 8, 2019 · Nov 21, 2019 · Nov 21, 2019
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -146,6 +146,10 @@ public class AbfsConfiguration{
       DefaultValue = DEFAULT_READ_AHEAD_QUEUE_DEPTH)
   private int readAheadQueueDepth;
 
+  @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ALWAYS_READ_AHEAD,
+      DefaultValue = DEFAULT_FS_AZURE_ALWAYS_READ_AHEAD)
+  private boolean alwaysReadAhead;
+
   @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ENABLE_FLUSH,
       DefaultValue = DEFAULT_ENABLE_FLUSH)
   private boolean enableFlush;
@@ -435,6 +439,10 @@ public int getReadAheadQueueDepth() {
     return this.readAheadQueueDepth;
   }
 
+  public boolean getAlwaysReadAhead(){
+    return this.alwaysReadAhead;
+  }
+
   public boolean isFlushEnabled() {
     return this.enableFlush;
   }

diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -446,9 +446,7 @@ public AbfsInputStream openFileForRead(final Path path, final FileSystem.Statist
 
       // Add statistics for InputStream
       return new AbfsInputStream(client, statistics,
-              AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), contentLength,
-              abfsConfiguration.getReadBufferSize(), abfsConfiguration.getReadAheadQueueDepth(),
-              abfsConfiguration.getTolerateOobAppends(), eTag);
+                AbfsHttpConstants.FORWARD_SLASH + getRelativePath(path), contentLength, eTag, abfsConfiguration);
     }
   }
 

diff --git a/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/...hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -51,6 +51,7 @@ public final class ConfigurationKeys {
   public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https";
   public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key";
   public static final String FS_AZURE_READ_AHEAD_QUEUE_DEPTH = "fs.azure.readaheadqueue.depth";
+  public static final String FS_AZURE_ALWAYS_READ_AHEAD = "fs.azure.always.readahead";
   /** Provides a config control to enable or disable ABFS Flush operations -
    *  HFlush and HSync. Default is true. **/
   public static final String FS_AZURE_ENABLE_FLUSH = "fs.azure.enable.flush";

diff --git a/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/...azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -56,6 +56,7 @@ public final class FileSystemConfigurations {
   public static final String DEFAULT_FS_AZURE_ATOMIC_RENAME_DIRECTORIES = "/hbase";
 
   public static final int DEFAULT_READ_AHEAD_QUEUE_DEPTH = -1;
+  public static final boolean DEFAULT_FS_AZURE_ALWAYS_READ_AHEAD = false;
   public static final boolean DEFAULT_ENABLE_FLUSH = true;
   public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true;
   public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true;

diff --git a/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java b/...ls/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java
@@ -30,6 +30,7 @@
 import org.apache.hadoop.fs.FileSystem.Statistics;
 import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
 import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
 
 /**
  * The AbfsInputStream for AbfsClient.
@@ -44,6 +45,7 @@ public class AbfsInputStream extends FSInputStream {
   private final String eTag;                  // eTag of the path when InputStream are created
   private final boolean tolerateOobAppends; // whether tolerate Oob Appends
   private final boolean readAheadEnabled; // whether enable readAhead;
+  private final boolean alwaysReadAhead; // read ahead even if reads are non sequential
 
   private byte[] buffer = null;            // will be initialized on first use
 
@@ -59,19 +61,19 @@ public AbfsInputStream(
       final Statistics statistics,
       final String path,
       final long contentLength,
-      final int bufferSize,
-      final int readAheadQueueDepth,
-      final boolean tolerateOobAppends,
-      final String eTag) {
+      final String eTag,
+      final AbfsConfiguration abfsConfiguration) {
     this.client = client;
     this.statistics = statistics;
     this.path = path;
     this.contentLength = contentLength;
-    this.bufferSize = bufferSize;
-    this.readAheadQueueDepth = (readAheadQueueDepth >= 0) ? readAheadQueueDepth : Runtime.getRuntime().availableProcessors();
-    this.tolerateOobAppends = tolerateOobAppends;
+    this.bufferSize = abfsConfiguration.getReadBufferSize();
+    this.readAheadQueueDepth = (abfsConfiguration.getReadAheadQueueDepth() >= 0)
+              ? abfsConfiguration.getReadAheadQueueDepth() : Runtime.getRuntime().availableProcessors();
+    this.tolerateOobAppends = abfsConfiguration.getTolerateOobAppends();
     this.eTag = eTag;
     this.readAheadEnabled = true;
+    this.alwaysReadAhead = abfsConfiguration.getAlwaysReadAhead();
   }
 
   public String getPath() {
@@ -144,7 +146,7 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO
       }
 
       // Enable readAhead when reading sequentially
-      if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize) {
+      if (-1 == fCursorAfterLastRead || fCursorAfterLastRead == fCursor || b.length >= bufferSize || alwaysReadAhead) {
         bytesRead = readInternal(fCursor, buffer, 0, bufferSize, false);
       } else {
         bytesRead = readInternal(fCursor, buffer, 0, b.length, true);