# Accelerating GFile with prefetching

Import tensorflow and gfile. Just make sure the version is at least `PAI1911`.

In [1]:
import tensorflow as tf
from tensorflow.python.platform import gfile

tf.__version__

'1.12.2-PAI1911'

We use OSS as example, so let's do some preparation.

1. import `tensorflow_io.oss` which is DSW built-in.
2. setup your OSS configuration.

In [3]:
import tensorflow_io.oss

ACCESS_ID="<your access id>"
ACCESS_KEY="<your access key>"
HOST = "<your host>"
BUCKET="<your bucket>"
OSS_BUCKET_ROOT="{}\x01id={}\x02key={}\x02host={}/".format(BUCKET, ACCESS_ID, ACCESS_KEY, HOST)


Finally, let's setup an `download` function and make some benchmark.

In [4]:
from time import time

def download(file, prefetch_threads=0):
    start = time()
    total_read = 0
    _1MB = 1024 * 1024
    oss_file = OSS_BUCKET_ROOT + file
    
    # The extra `prefetch_threads` argument is the magic here. It defaults 0.
    with tf.gfile.GFile(oss_file, mode="rb", prefetch_threads=prefetch_threads) as f:
        while True:
            buf_len = len(f.read(_1MB))
            total_read += buf_len
            if buf_len < _1MB:
                break
    elapsed_sec = time() - start
    total_read_MB = 1.0 * total_read / _1MB
    speed =  total_read_MB / elapsed_sec
    print("time (s) elapsed: %.2f s, threads: %d, speed: %.2f MB/s" 
          % (elapsed_sec, prefetch_threads, speed))

file = "<your file to download>"

download(file)
download(file, prefetch_threads=4)

time (s) elapsed: 66.92 s, threads: 0, speed: 8.02 MB/s
time (s) elapsed: 25.68 s, threads: 4, speed: 20.90 MB/s
