In [1]:
# HIDDEN CELL
import sys, os

# Importing argopy in dev mode:
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
if not on_rtd:
    sys.path.insert(0, "/Users/gmaze/git/github/euroargodev/argopy")
    import git
    import argopy
    from argopy.options import OPTIONS
    print("argopy:", argopy.__version__, 
          "\nsrc:", argopy.__file__, 
          "\nbranch:", git.Repo(search_parent_directories=True).active_branch.name, 
          "\noptions:", OPTIONS)
else:
    sys.path.insert(0, os.path.abspath('..'))

import xarray as xr
# xr.set_options(display_style="html");
xr.set_options(display_style="text");

argopy: 0.1.5 
src: /Users/gmaze/git/github/euroargodev/argopy/argopy/__init__.py 
branch: parallel-requests 
options: {'src': 'erddap', 'local_ftp': '.', 'dataset': 'phy', 'cachedir': '/Users/gmaze/.cache/argopy', 'mode': 'standard', 'api_timeout': 60}


In [2]:
import argopy
from argopy import DataFetcher as ArgoDataFetcher

# Performance

## Caching data

- with **argopy** global options:

```python
argopy.set_options(cachedir='mycache_folder')
```

- in a temporary context:

```python
with argopy.set_options(cachedir='mycache_folder'):
    ds = ArgoDataFetcher(cache=True).profile(6902746, 34).to_xarray()
```

- when instantiating the data fetcher:

```python
ds = ArgoDataFetcher(cache=True, cachedir='mycache_folder').profile(6902746, 34).to_xarray()
```

Specifyng a cache directory at the fetcher level will ensure

## Clearing the cache

Cached data have an expiration time of 1 day.

If you want to manuallt clear your cache folder, and/or make sure your data are newly fetched, you can do it at the fetcher level with the ``clear_cache`` method.

Start to fetch data and store them in cache:

```python
fetcher = ArgoDataFetcher(cache=True, cachedir='mycache_folder').profile(6902746, 34)
fetcher.to_xarray();
```

Fetched data are in the local cache folder:

```python
os.listdir('mycache_folder')
```

where we see one hash entries the newly fetched data and the cache registry file ``cache``.

We can then fetch something else:

```python
fetcher2 = ArgoDataFetcher(cache=True, cachedir='mycache_folder').profile(1901393, 1)
fetcher2.to_xarray();
```

All fetched data are now cached in 'mycache_folder':

```python
os.listdir('mycache_folder')
```

Note the new hash file from the ``fetcher2`` data.

We can safely clear the cache from the first fetcher data:

```python
fetcher.clear_cache()
```

```python
os.listdir('mycache_folder')
```

By using the fetcher level clear cache, you make sure that only data fetched with it are removed, while other fetched data (with other fetchers for instance) will stay in place.

If you want to clear the entire cache folder, whatever the fetcher used, do it at the package level with:

```python
argopy.clear_cache()
```

```python
os.listdir('mycache_folder')

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-13-6726e674f21f> in <module>
----> 1 os.listdir('mycache_folder')

FileNotFoundError: [Errno 2] No such file or directory: 'mycache_folder'
```

## Parallel fetching

Sometimes you may find that your request takes a long time to fetch, or simply does not even succeed. You can then try to let argopy chunks your request into smaller pieces and have it fetched in parallel for you. This is done with the argument ``parallel`` of the data fetcher and can be tuned using options ``chunks`` and ``chunksize``.

This goes by default like this:

In [3]:
box = [-60, -55, 40.0, 50.0, 0.0, 100.0, "2007-01-01", "2007-02-01"]
loader_par = ArgoDataFetcher(src='erddap', parallel=True).region(box)

you can use the option ``progress`` to display a progress bar during fetching:

In [4]:
loader_par = ArgoDataFetcher(src='erddap', parallel=True, progress=True).region(box)
loader_par

<datafetcher.erddap>
Name: Ifremer erddap Argo data fetcher for a space/time region
API: https://www.ifremer.fr/erddap
Domain: phy_[x=-60.00/-55.00; y=40.00/ ... 00.0; t=2007-01-01/2007-02-01]
Backend: erddap (parallel=True)
User mode: standard

Then, simply fetch data as usual:

In [5]:
%%time
ds = loader_par.to_xarray()

100%|██████████| 1/1 [00:00<00:00,  3.92it/s]

CPU times: user 258 ms, sys: 14 ms, total: 272 ms
Wall time: 461 ms





To check in how many chunks your request has been split, you can look at the ``uri`` property of the fetcher, it gives the list of paths toward data:

In [7]:
loader_par.uri

['https://www.ifremer.fr/erddap/tabledap/ArgoFloats.nc?data_mode,latitude,longitude,position_qc,time,time_qc,direction,platform_number,cycle_number,config_mission_number,vertical_sampling_scheme,pres,temp,psal,pres_qc,temp_qc,psal_qc,pres_adjusted,temp_adjusted,psal_adjusted,pres_adjusted_qc,temp_adjusted_qc,psal_adjusted_qc,pres_adjusted_error,temp_adjusted_error,psal_adjusted_error&longitude>=-60&longitude<=-55&latitude>=40.0&latitude<=50.0&pres>=0.0&pres<=100.0&time>=1167609600.0&time<=1170288000.0&distinct()&orderBy("time,pres")']