## **Scripts used to download archives of climate data**

In [5]:
import xarray as xr
import os
import re
import glob
from IPython.display import clear_output

---
### **CHIRPS**

Download archive of global daily data at 0.25 degree resolution:
```
! cd /home/clair/data/chirps; for y in `seq 1981 2022`; do wget https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p25/chirps-v2.0.$y.days_p25.nc; done
```

Update with latest file only:
```
! cd /home/clair/data/chirps; wget https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p25/chirps-v2.0.2022.days_p25.nc
```

---
### **E-OBS**

In [15]:
# define variables to download
var_list = ["tg", "tn", "tx", "rr"]
os.chdir("/home/clair/Data/e-obs_025/")

#### Download archive of data at 0.25 degree resolution 
- only needs to be modified / run when version updated

In [16]:
chunks = ["1950-1964", "1965-1979", "1980-1994", "1995-2010"]
for varnm in var_list:
    for ch in chunks:

        urlstring = "https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/Grid_0.25deg_reg_ensemble/"+varnm+"_ens_mean_0.25deg_reg_"+ch+"_v26.0e.nc"
        ! wget $urlstring
        
    clear_output(wait = False)
    
print("Done.")

Done.


#### Download & merge current data

- get most recent chunk + current calendar year + last 60 days
- concatenate latest data into a single file

In [2]:
# method adapted from https://developer.dataplatform.knmi.nl/example-scripts#list-10-current-files-get-first

import requests

def download_file_from_temporary_download_url(download_url, filename):
    # support method to download individual files from URL
    try:
        with requests.get(download_url, stream=True) as r:
            r.raise_for_status()
            with open(filename, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
    except Exception:
        print("Unable to download file using download URL")
        
    print(filename+" downloaded")

def download_eobs_60day(varnm):
    
    # method to list all available files & download

    api_url = "https://api.dataplatform.knmi.nl/open-data/v1/datasets/daily_updated_"+varnm+"_eobs/versions/1/files"
    
    list_files = requests.get(f"{api_url}", headers = {"Authorization": api_key}, params = {"maxKeys": 200}).json().get("files")
    list_files = [file for file in list_files if "0.25deg" in file.get("filename")]
    
    for file in list_files:
        
        filename = file.get("filename")
        endpoint = f"{api_url}/{filename}/url"
        get_file_response = requests.get(endpoint, headers={"Authorization": api_key})
        
        download_url = get_file_response.json().get("temporaryDownloadUrl")
        download_file_from_temporary_download_url(download_url, filename)
        
    clear_output(wait = False)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# load API from file - https://developer.dataplatform.knmi.nl/get-started#obtain-an-api-key
with open("/home/clair/Data/knmi_api.txt") as f_api: api_key = f_api.read()



In [17]:
os.chdir("/home/clair/Data/e-obs_025/tmp")

for varnm in var_list:
    
    # download current chunk
    chunk_string = "https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/Grid_0.25deg_reg_ensemble/"+varnm+"_ens_mean_0.25deg_reg_2011-2022_v26.0e.nc"
    if os.path.exists(re.sub(".+/", "", chunk_string)): 
        print("Delete existing "+varnm+" files manually first")
        continue
    ! wget $chunk_string
        
    # download current calendar year
    cy_string = "https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/months/ens/"+varnm+"_0.25deg_day_2022_grid_ensmean.nc"
    ! wget $cy_string
    
    # download last 60 days
    download_eobs_60day(varnm)
    
    # load the latest data for this year
    ds_chunk = xr.open_dataset(re.sub(".+/", "", chunk_string)).dropna("time", "all")
    ds_cy = xr.open_dataset(re.sub(".+/", "", cy_string)).dropna("time", "all")
    ds_60d = xr.open_mfdataset([fnm for fnm in glob.glob(varnm+"*.nc") if re.search("_[0-9]{8}_", fnm)])
    
    # use latest version of each day available (60d > monthly current year > most recent chunk)
    ds_cy = ds_cy.sel(time = ds_cy.time < ds_60d.time.min())
    ds_chunk = ds_chunk.sel(time = ds_chunk.time < ds_cy.time.min())
    
    # save as netcdf
    ds_latest = xr.concat([ds_chunk, ds_cy, ds_60d], "time")
    ds_latest.to_netcdf("../"+re.sub(".+/", "", re.sub("[0-9]{4}-[0-9]{4}", "latest", chunk_string)))
    
    clear_output(wait = False)
    print("Done.")

Done.
