Patching `pandas` to use `requests_cache`.

In [1]:
    import requests, types, contextlib, requests_cache, pandas

In [2]:
    def _urlopen(filepath_or_buffer):
        response = requests.get(filepath_or_buffer)
        response.compression = None
        response.headers['Content-Encoding'] = None
        read = lambda self: self.content
        response.read = types.MethodType(read, response)
        return response

In [3]:
    @contextlib.contextmanager
    def cache(name=None):
        name and requests_cache.install_cache(name)
        urlopen = pandas.io.common._urlopen
        pandas.io.html.urlopen = pandas.io.common._urlopen = _urlopen
        yield
        pandas.io.html.urlopen = pandas.io.common._urlopen = urlopen
        

In [4]:
    !rm data.sqlite

In [5]:
    %%time
    df = pandas.read_json("http://api.github.com/repos/deathbeds/pidgin/commits?page=1", typ=pandas.Series)
    dfs = pandas.read_html('https://en.wikipedia.org/wiki/Thermodynamics')

CPU times: user 237 ms, sys: 43.6 ms, total: 281 ms
Wall time: 705 ms


In [6]:
    %%time
    with cache('data'):
        df = pandas.read_json("http://api.github.com/repos/deathbeds/pidgin/commits?page=1")

CPU times: user 33.8 ms, sys: 8.31 ms, total: 42.1 ms
Wall time: 228 ms


In [7]:
    %%time
    with cache('data'):
        df = pandas.read_json("http://api.github.com/repos/deathbeds/pidgin/commits?page=1")

CPU times: user 44.4 ms, sys: 5.62 ms, total: 50.1 ms
Wall time: 52.3 ms


In [8]:
    %%time
    with cache('data'):
        dfs = pandas.read_html('https://en.wikipedia.org/wiki/Thermodynamics', flavor=None)

CPU times: user 421 ms, sys: 8.41 ms, total: 429 ms
Wall time: 578 ms


In [9]:
    %%time
    with cache('data'):
        dfs = pandas.read_html('https://en.wikipedia.org/wiki/Thermodynamics', flavor=None)

CPU times: user 403 ms, sys: 7.27 ms, total: 410 ms
Wall time: 411 ms


In [10]:
    %%time
    with cache('data'):
        dfs = pandas.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')

CPU times: user 17.7 ms, sys: 3.06 ms, total: 20.7 ms
Wall time: 49.4 ms


In [11]:
    %%time
    with cache('data'):
        dfs = pandas.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')

CPU times: user 7.19 ms, sys: 1.42 ms, total: 8.61 ms
Wall time: 9.09 ms
