diff --git a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
index 747c1e32fa8..e0e565bd635 100644
--- a/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
+++ b/desktop/libs/hadoop/src/hadoop/fs/webhdfs.py
@@ -835,6 +835,7 @@ def __init__(self, fs, path, mode='r'):
     self._path = normpath(path)
     self._pos = 0
     self._mode = mode
+    self._data = ""
 
     try:
       self._stat = fs.stats(path)
@@ -859,6 +860,9 @@ def seek(self, offset, whence=0):
     else:
       raise IOError(errno.EINVAL, _("Invalid argument to seek for whence"))
 
+    # Reset cached data on seek
+    self._data = ""
+
   def stat(self):
     self._stat = self._fs.stats(self._path)
     return self._stat
@@ -867,8 +871,20 @@ def tell(self):
     return self._pos
 
   def read(self, length=DEFAULT_READ_SIZE):
-    data = self._fs.read(self._path, self._pos, length)
-    self._pos += len(data)
+    # Make sure to grab more data if there is none available
+    if length > len(self._data):
+      # Grab at minimum DEFAULT_READ_SIZE
+      self._data += self._fs.read(self._path, self._pos, max(length, DEFAULT_READ_SIZE))
+
+    # Make sure to increase the _pos by length of data or what length of requested
+    real_length = min(len(self._data), length)
+    self._pos += real_length
+
+    # Strip off real length from data set
+    data = self._data[:real_length]
+    self._data = self._data[real_length:]
+
+    # Return the requested length of data
     return data
 
   def write(self, data):