# <u> Download mimimum and maximum temperature from ERA5 Land Data </u><a id='ERAimport'></a>
ERA5 is the fifth generation ECMWF reanalysis for the global climate and weather for the past 4 to 7 decades published by the [Copernicus Climate Change Service](https://cds.climate.copernicus.eu/about-c3s). ERA5 provides hourly estimates for a large number of atmospheric, ocean-wave and land-surface quantities. ERA5 reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past. We will use following datasets based on available weather variables:
<br>
<br> **[ERA5-Land hourly data](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview) from 1981 to present**. We will use **daily minimum and maximum temperature data** with a horizontal resolution of **0.1째x0.1째** for the period **2000 to 2019**. Specifically, the following weather variables are imported:
* 2m temperature in K

ERA5-Land hourly data are downloaded by using the [CDS Toolbox Editor](https://cds.climate.copernicus.eu/cdsapp#!/toolbox). The online editor allows to calculate daily minimum and maximum of hourly weather variables and export the data to netCDF format files. The following code has been used to **export daily data of the ERA5 Land hourly database**. Each netCDF file contains the data of one weather variable over period of 1 or 4 years:
```python
# CODE USED FOR THE CDS TOOLBOX
# ERA5 Land hourly data from 1981 to present
# Store files per year, due to size
import cdstoolbox as ct

@ct.application(title='Download data')
@ct.output.download()
@ct.output.download()
def download_application():
    count=1
    for yr in ['2000', '2001',]:
        for mn in [ '01', '02', '03', '04', '05', '06',
            '07', '08', '09', '10', '11', '12',]:
                u = ct.catalogue.retrieve(
                 'reanalysis-era5-land',
                 {             
                 'variable': ['2m_temperature'],
                 'year': yr,
                 'month': mn,
                 'day': [
                    '01', '02', '03',
                    '04', '05', '06',
                    '07', '08', '09',
                    '10', '11', '12',
                    '13', '14', '15',
                    '16', '17', '18',
                    '19', '20', '21',
                    '22', '23', '24',
                    '25', '26', '27',
                    '28', '29', '30',
                    '31',
                ],
                'time': [
                    '00:00', '01:00', '02:00',
                    '03:00', '04:00', '05:00',
                    '06:00', '07:00', '08:00',
                    '09:00', '10:00', '11:00',
                    '12:00', '13:00', '14:00',
                    '15:00', '16:00', '17:00',
                    '18:00', '19:00', '20:00',
                    '21:00', '22:00', '23:00',
                ],
                 'area': [75, -30, 32, 50,],
                 }
                 )
                day_min=ct.climate.daily_min(u,keep_attrs=True)
                day_max=ct.climate.daily_max(u,keep_attrs=True)   
                if count == 1:
                     day_min_all_u=day_min
                     day_max_all_u=day_max                       
                else:       
                     day_min_all_u=ct.cube.concat([day_min_all, day_min], dim='time')
                     day_max_all_u=ct.cube.concat([day_max_all_u, day_max], dim='time') 
                count = count + 1              
    return day_min_all_u, day_max_all_u
```

Clean extracted files containing daily minimum and maximum temperature. Variables names have to be renamed in order to concated files.

In [None]:
# Peckages
import sys
import os
import xarray as xr

# Path
PATH = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/ERA5_Land/"

year = [2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018]  
# Using for loop
for i in year:
    
    j = i + 1    
    
    #--- Maximum Temperature ---#
    
    # Read NetCF file
    dmax = xr.open_mfdataset(PATH+'ERA5_Land_'+str(i)+'_'+str(j)+'_maxTemp.nc')

    # Rename variables
    dmax = dmax.rename({'lon':'longitude'})
    dmax = dmax.rename({'lat':'latitude'})
    dmax = dmax.rename({'tas':'max_temp'})

    # Convert longitude from the 0-360 range to -180,180    
    dmax = dmax.assign_coords(longitude=(((dmax.longitude + 180) % 360) - 180)).sortby('longitude')      
    
    # Replace data
    dmax.to_netcdf(PATH+'ERA5_Land_'+str(i)+'_'+str(j)+'_maxTemp2.nc') 
    
    #--- Minimum Temperature ---#
    
    # Read NetCF file
    dmin = xr.open_mfdataset(PATH+'ERA5_Land_'+str(i)+'_'+str(j)+'_minTemp.nc')

    # Rename variables
    dmin = dmin.rename({'lon':'longitude'})
    dmin = dmin.rename({'lat':'latitude'})
    dmin = dmin.rename({'tas':'min_temp'})

    # Convert longitude from the 0-360 range to -180,180    
    dmin = dmin.assign_coords(longitude=(((dmin.longitude + 180) % 360) - 180)).sortby('longitude')   
    
    # Replace data
    dmin.to_netcdf(PATH+'ERA5_Land_'+str(i)+'_'+str(j)+'_minTemp2.nc') 
        
print('Done!')

ERA5 Land daily weather data are aggregated on the NUTS-3 level and exported to csv files. Python codes are provided by [Matteo de Felice's website](http://www.matteodefelice.name/post/aggregating-gridded-data/).

In [None]:
# Peckages
import sys
import xarray as xr 
import numpy as np
import regionmask
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import glob
import time
import dask

# Path
PATH = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/ERA5_Land/"
PATH_SF = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/Shapefiles/"

# Start time
start = time.time()

# Open NUTS shapefile
nuts = gpd.read_file(PATH_SF+'NUTS_RG_01M_2021_4326_LEVL_3.shp')

# Read NetCF file
d = xr.open_mfdataset(PATH+'ERA5_Land_2008*.nc')

# Generate mask of NUTS regions
nuts_mask_poly = regionmask.Regions(name = 'nuts_mask', numbers = list(range(0,len(nuts))), names = list(nuts.NUTS_ID), abbrevs = list(nuts.NUTS_ID), outlines = list(nuts.geometry.values[i] for i in range(0,len(nuts))))

# Calcutes the NUTS mask for the ECWMF dataset
mask = nuts_mask_poly.mask(d.isel(time = 0).sel(latitude = slice(32, 75), longitude = slice(-30, 50)), lat_name='latitude', lon_name='longitude')

# Generate empty dask dataframe (via pandas dataframe)
# Dataframe will be filled with data in the following loop
df = pd.DataFrame([])

# Calculate variables for remaining NUTS3 regions (1511 regions) and append dataframe
for i in range(0, len(nuts)):

    try:
        
        # Select longitude and latidue where its queal to target NUTS region
        lat = mask.latitude.values
        lon = mask.longitude.values              
        sel_mask = mask.where(mask == i).values
        id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))]
        id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))]    
        out_sel = d.sel(latitude = slice(id_lat[0], id_lat[-1]), longitude = slice(id_lon[0], id_lon[-1])).compute().where(mask == i)
        
        # Generate mean over region
        xloop = out_sel.groupby('time').mean(...)
        
        # To pandas dataframe
        append = xloop.to_dataframe().reset_index()

        # Add NUTS ID and Date to dataframe
        append['NUTS_ID'] = nuts.NUTS_ID[i]  

        # Append existing dataframe from NUTS code 1
        df = df.append(append)       

      
    except:
        continue

# Export dataframe to CSV (; seperator)
df.to_csv(PATH+"CSV/era5_dew_temp_"".csv", columns=['time', 'max_temp', 'min_temp', 'NUTS_ID'], encoding='utf-8', header = ["date", "max_temp", "min_temp","nuts_id"], index=False, sep=';', float_format='%.15f')

print('Done!')
end = time.time()
print('Total Time: {} min'.format((end-start)/60))

Aggregate all csv files into on large csv file containg all the data.

In [None]:
# Peckages
import sys
import pandas as pd
import glob

# Path
PATH = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/ERA5_Land/"

# Merge all csv files into on single csv file
interesting_files = glob.glob(PATH+"CSV/era5_temp_*.csv") 
df = pd.concat((pd.read_csv(f, sep = ';', header = 0) for f in interesting_files))
df.to_csv(PATH+"CSV/ERA5_temp_2000_2019.csv", index=False, sep = ';')

print('Done!')

# <u> Download total precipitation from ERA5 Land Data </u><a id='ERAimport'></a>
ERA5 is the fifth generation ECMWF reanalysis for the global climate and weather for the past 4 to 7 decades published by the [Copernicus Climate Change Service](https://cds.climate.copernicus.eu/about-c3s). ERA5 provides hourly estimates for a large number of atmospheric, ocean-wave and land-surface quantities. ERA5 reanalysis combines model data with observations from across the world into a globally complete and consistent dataset using the laws of physics. Reanalysis produces data that goes several decades back in time, providing an accurate description of the climate of the past. We will use following datasets based on available weather variables:
<br>
<br> **[ERA5-Land hourly data](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land?tab=overview) from 1950 to present**. We will use **total precipitation** with a horizontal resolution of **0.1째x0.1째** for the period **1980 to 2019**. Specifically, the following weather variables are imported:
* total precipitation

ERA-5-Land records precipitation as accumulated variable. Thus, the first observation in the upcoming day (d+1 00:00 UTC) refers to the total precipitation accumulated over the previous day [(see Conversion table for accumulated variables)](https://confluence.ecmwf.int/pages/viewpage.action?pageId=197702790). Daily accumulated precipitation data at 00:00 UTC are downloaded by using the [CDS Toolbox Editor](https://cds.climate.copernicus.eu/cdsapp#!/toolbox).

```python
# CODE USED FOR THE CDS TOOLBOX
# ERA5 Land hourly data from 1981 to present
# Store files per year, due to size
import cdstoolbox as ct

@ct.application(title='Download data')
@ct.output.download()
def download_application():
    data = ct.catalogue.retrieve(
        'reanalysis-era5-land',
        {
            'variable': 'total_precipitation',
            'year': ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019',],
            'month': ['01','02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',],
            'day': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
                '13', '14', '15',
                '16', '17', '18',
                '19', '20', '21',
                '22', '23', '24',
                '25', '26', '27',
                '28', '29', '30',
                '31',
            ],
            'time': '00:00',             
            'area': [ 75, -30, 32, 50,],
        }
    )
    return data
```
The following code aggregates total precipitation over NUTS-3 regions by taking the mean of all obervations within the boundary of a NUTS-3 regions (***tp***). Multiple python scripts were executed on the ECOOM calc server in parallel via batch-file to speed-up the process. Each python script calculates daily total precipitation of NUTS-3 regions for one year stored in csv file. Following, yearly csv files are aggregated to one csv file storing entire data.

In [None]:
filename = 2000

# Peckages
import sys
import xarray as xr 
import numpy as np
import regionmask
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import glob
import time
import dask
from multiprocessing import Process

# Path
PATH = "C:/Users/u0120816/Documents/Project_1/"
PATH_SF = "C:/Users/u0120816/Documents/Project_1/Shapefiles/"

# Start time
start = time.time()  

# Open County shapefile
nuts = gpd.read_file(PATH_SF+'NUTS_RG_01M_2021_4326_LEVL_3.shp')

# Read NetCF file
d = xr.open_mfdataset(PATH+'ERA5/ERA5_'+ str(filename)+'.nc')

# Generate mask of NUTS regions
nuts_mask_poly = regionmask.Regions(name = 'nuts_mask', numbers = list(range(0,1512)), names = list(nuts.NUTS_ID), abbrevs = list(nuts.NUTS_ID), outlines = list(nuts.geometry.values[i] for i in range(0,1512)))

# Calcutes the NUTS mask for the ECWMF dataset
mask = nuts_mask_poly.mask(d.isel(time = 0).sel(latitude = slice(32, 75), longitude = slice(-30, 50)), lat_name='latitude', lon_name='longitude')

# Generate empty dask dataframe (via pandas dataframe)
# Dataframe will be filled with data in the following loop
df = pd.DataFrame([])

# Calculate variables for remaining counties (3232 regions) and append dataframe
for j in range(0, len(nuts)):

    try:
        
        # Select longitude and latidue where its queal to target county
        lat = mask.latitude.values
        lon = mask.longitude.values
        sel_mask = mask.where(mask == j).values
        id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))]
        id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))]
        out_sel = d.sel(latitude = slice(id_lat[0], id_lat[-1]), longitude = slice(id_lon[0], id_lon[-1])).compute().where(mask == j)

        # Generate mean over county
        xloop_mean = out_sel.groupby('time').mean(...)

        # To pandas dataframe
        append = xloop_mean.to_dataframe().reset_index()
        
        # Add GEOID and Date to dataframe
        append['nuts_id'] = county.NUTS_ID[j]
        
        # Append existing dataframe
        df = df.append(append) 
        
    except:
        continue


# Export dataframe to CSV (; seperator)        
df.to_csv(PATH+'CSV/ERA5_precip_'+str(filename)+'.csv', columns=['time', 'tp', 'nuts_id'], encoding='utf-8', header = ["time", "tp", "nuts_id"], index=False, sep=';', float_format='%.15f')

print('Done!')
end = time.time()
print('Total Time: {} min'.format((end-start)/60))

# <u> Download dewpoint temperature from ERA5-Land data </u>
Dewpoint temperature can be used to calc relative humidity using the following [Formula](https://bmcnoldy.rsmas.miami.edu/Humidity.html). Following code downloads ERA5 daily averages though the [ERA5 API](https://cds.climate.copernicus.eu/api-how-to). The API allows to specify the aggregation level (daily mean, max or min) (see [PDF Table 2](https://datastore.copernicus-climate.eu/documents/app-c3s-daily-era5-statistics/C3S_Application-Documentation_ERA5-daily-statistics-v2.pdf)).

In [None]:
# Packages
import cdsapi
import requests
import urllib3
urllib3.disable_warnings()

# PATH
PATH = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/ERA5_Land/Raw/"
 
# Requires:
# 1) the CDS API to be installed and working on your system
# 2) You have agreed to the ERA5 Licence (via the CDS web page)
# 3) Selection of required variable, daily statistic, etc

# Call API
c = cdsapi.Client(timeout=600)

# Years
YEARS =  [ 
#'2000', '2001', '2002', '2003', '2004', '2005'  '2006', '2011', '2012', '2014', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2008', '2009', '2010', 
 '2007', 
]

# Months '01','02','03', '04',
MONTHS = [ '05', '06', '07', '08', '09', '10', '10', '11', '12']


# Loop over all parameters
for yr in YEARS:    
    for mn in MONTHS:            
        print('Running: '+yr+mn)    
        
        
        result = c.service(
            "tool.toolbox.orchestrator.workflow",
            params={
                "realm": "user-apps",
                "project": "app-c3s-daily-era5-statistics",
                "version": "master",
                "kwargs": {
                "dataset": "reanalysis-era5-single-levels",
                "product_type": "reanalysis",
                "variable": "2m_dewpoint_temperature",
                "statistic": "daily_mean",
                "frequency": "1-hourly",
                "year": yr,
                "month": mn, 
                "grid": "0.1/0.1",                        
                "area": {"lat": [32,75], "lon": [-30,50]}   
        },
        "workflow_name": "application"
        })


        # set name of output file for each month (statistic, variable, year, month)
        file_name = "ERA5_Land_dew_temp_"+ yr+mn + ".nc"                       
        location=result[0]['location']    
        res = requests.get(location, stream = True)
        print("Writing data to " + file_name)
        with open(PATH+file_name,'wb') as fh:
            for r in res.iter_content(chunk_size = 1024):
                fh.write(r)
        fh.close()
print('Done!')

Aggregate daily average dwepoint temperature on NUTS-3 level and store results ina csv file.

In [None]:
filename = 'ERA5_Land_dew_temp_2000'
year = filename.split("_")[-1]

# Packages
import sys
import xarray as xr 
import numpy as np
import regionmask
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import glob
import time
import dask
import math

# Server File Path
PATH = "D:/Users/Felix Bracht/ERA5_land/"
PATH_SF = "D:/Users/Felix Bracht/Shapefiles/"

# Start time
start = time.time()

# Open NUTS shapefile
nuts = gpd.read_file(PATH_SF+'NUTS_RG_01M_2021_4326_LEVL_3.shp')

# Read NetCF file
d = xr.open_mfdataset(PATH+filename+'*.nc')
d = d.rename({'lon':'longitude'})
d = d.rename({'lat':'latitude'})

# Generate mask of NUTS regions
nuts_mask_poly = regionmask.Regions(name = 'nuts_mask', numbers = list(range(0,len(nuts))), names = list(nuts.NUTS_ID), abbrevs = list(nuts.NUTS_ID), outlines = list(nuts.geometry.values[i] for i in range(0,len(nuts))))

# Calcutes the NUTS mask for the ECWMF dataset
mask = nuts_mask_poly.mask(d.isel(time = 0).sel(latitude = slice(32, 75), longitude = slice(-30, 50)), lat_name='latitude', lon_name='longitude')

# Generate empty dask dataframe (via pandas dataframe)
# Dataframe will be filled with data in the following loop
df = pd.DataFrame([])

# Calculate variables for remaining NUTS3 regions (1511 regions) and append dataframe
for i in range(0, len(nuts)):

    try:
        
        # Select longitude and latidue where its queal to target NUTS region
        lat = mask.latitude.values
        lon = mask.longitude.values              
        sel_mask = mask.where(mask == i).values
        id_lon = lon[np.where(~np.all(np.isnan(sel_mask), axis=0))]
        id_lat = lat[np.where(~np.all(np.isnan(sel_mask), axis=1))]    
        out_sel = d.sel(latitude = slice(id_lat[0], id_lat[-1]), longitude = slice(id_lon[0], id_lon[-1])).compute().where(mask == i)
        
        # Generate mean over region
        xloop = out_sel.groupby('time').mean(...)
        
        # To pandas dataframe
        append = xloop.to_dataframe().reset_index()

        # Add NUTS ID and Date to dataframe
        append['NUTS_ID'] = nuts.NUTS_ID[i]  

        # Append existing dataframe from NUTS code 1        
        df= pd.concat((df, append), axis = 0)
       

      
    except:
        continue

# Export dataframe to CSV (; seperator)
df.to_csv(PATH+"CSV/era5_dew_temp_"+str(year)+".csv", columns=['time', 'd2m', 'NUTS_ID'], encoding='utf-8', header = ["date", "d2m","nuts_id"], index=False, sep=';', float_format='%.15f')

print('Done!')
end = time.time()
print('Total Time: {} min'.format((end-start)/60))

Aggregate csv file into one csv file containing all the data.

In [None]:
# Peckages
import sys
import pandas as pd
import glob

# Path
PATH = "C:/Users/u0120816/OneDrive - KU Leuven/FB_Weather_Firm/Data/Python/ERA5_Land/CSV"

# Merge all csv files into on single csv file
interesting_files = glob.glob(PATH+"era5_dew_temp_*.csv") 
df = pd.concat((pd.read_csv(f, sep = ';', header = 0) for f in interesting_files))
df.to_csv(PATH+"ERA5_dew_temp_2000_2019.csv", index=False, sep = ';')

print('Done!')