In [3]:
import numpy as np
import pandas as pd
import xarray as xr

from tqdm import tqdm

In [10]:
df = pd.read_csv('../../data/sweden/sample_plots_uerra_cdf_features.csv')
df.head()

Unnamed: 0,year,plotid,landuseclass,easting,northing,longitude,latitude,volbirch,volcontorta,volpine,...,uerra_consecutive_dry_days_index_per_time_period,uerra_number_of_cdd_periods_with_more_than_5days_per_time_period,uerra_consecutive_wet_days_index_per_time_period,uerra_number_of_cwd_periods_with_more_than_5days_per_time_period,uerra_precipitation_days_index_per_time_period,uerra_wet_days_index_per_time_period,uerra_highest_one_day_precipitation_amount_per_time_period,uerra_highest_five_day_precipitation_amount_per_time_period,uerra_number_of_5day_heavy_precipitation_periods_per_time_period,uerra_simple_daily_intensity_index_per_time_period
0,2007,106,Skogsm,669894,7116684,18.491601,64.134697,78.134338,0.0,0.0,...,34.0,227.0,9.0,13.0,1549.0,1549.0,63.607101,63.607101,2.0,5.118152
1,2007,112,Skogsm,670490,7116694,18.503833,64.134493,0.0,0.0,0.0,...,34.0,219.0,9.0,20.0,1646.0,1646.0,63.545086,63.545086,2.0,5.157168
2,2007,118,Myr,671088,7116706,18.516109,64.134305,84.315784,0.0,0.0,...,34.0,219.0,9.0,20.0,1646.0,1646.0,63.545086,63.545086,2.0,5.157168
3,2007,212,Skogsm,671110,7115502,18.515195,64.123509,0.0,0.0,0.0,...,34.0,219.0,9.0,20.0,1646.0,1646.0,63.545086,63.545086,2.0,5.157168
4,2007,218,Skogsm,671117,7114900,18.514657,64.118114,0.0,0.0,0.0,...,34.0,219.0,9.0,20.0,1646.0,1646.0,63.545086,63.545086,2.0,5.157168


Load ERA5 rasters

In [8]:
ds_cfd = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecacfd.nc")
ds_fd = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecafd.nc")
ds_csu = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecacsu.nc")
ds_id = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecaid.nc")
ds_su = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecasu.nc")

ds_cdd = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecacdd.nc")
ds_cwd = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecacwd.nc")
ds_pd = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecapd.nc")
ds_rr1 = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecarr1.nc")
ds_rx1day = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecarx1day.nc")
ds_rx5day = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecarx5day.nc")
ds_sdii = xr.open_dataset("/data-uerra/era5/cdo/era5-nordics-ecasdii.nc")

rasters = [ds_cfd, ds_fd, ds_csu, ds_id, ds_su, ds_cdd, ds_cwd, ds_pd, ds_rr1, ds_rx1day, ds_rx5day, ds_sdii]

Extract pixel values at sample plots coordinates

In [13]:
results = {}
for i, row in tqdm(df.iterrows(), total=df.shape[0]):
    lon = row['longitude']
    lat = row['latitude']
    for ds in rasters:
        for v in ds.variables:
            if 'height' in ds.variables:
                heights = ds['height']
            times = ds['time']

            if v not in ["time", "longitude", "latitude", "inate.", "oordinate.", "height"]:
                if v not in results:
                    results[v] = []
                if 'height' in ds.variables:
                    values = ds.sel(time=times[0], height=heights[0], longitude=lon, latitude=lat, method='nearest')[v].values.tolist()
                else:
                    values = ds.sel(time=times[0], longitude=lon, latitude=lat, method='nearest')[v].values.tolist()
                results[v].append(values)

100%|██████████| 24239/24239 [25:43<00:00, 15.71it/s]


Add ERA5 features to dataframe

In [14]:
for k in results:
    print(k)
    df[f'era5_{k}'] = results[k]

number_of_cfd_periods_with_more_than_5days_per_time_period
frost_days_index_per_time_period
number_of_csu_periods_with_more_than_5days_per_time_period
ice_days_index_per_time_period
summer_days_index_per_time_period
consecutive_dry_days_index_per_time_period
number_of_cdd_periods_with_more_than_5days_per_time_period
consecutive_wet_days_index_per_time_period
number_of_cwd_periods_with_more_than_5days_per_time_period
precipitation_days_index_per_time_period
wet_days_index_per_time_period
highest_one_day_precipitation_amount_per_time_period
highest_five_day_precipitation_amount_per_time_period
number_of_5day_heavy_precipitation_periods_per_time_period
simple_daily_intensity_index_per_time_period


In [16]:
df.to_csv('../../data/sweden/sample_plots_all_features.csv', index=False)