# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [1]:
!date

Thu Feb 23 07:48:48 UTC 2023


In [2]:
pip install vaex

Note: you may need to restart the kernel to use updated packages.


### Define s3 storage parameters

In [3]:
import s3fs
import xarray as xr
import pandas as pd
import h3
import vaex

In [4]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-single-levels_2015-2019.nc',
  'LastModified': datetime.datetime(2023, 1, 6, 7, 24, 40, 847000, tzinfo=tzlocal()),
  'ETag': '"2582b929cffaa1770daa45a508dc689a-8"',
  'Size': 391

### Define s3 store for the **netCDF file**

In [5]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [7]:
GLC_AOI

In [8]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [9]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [10]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

In [11]:
GLC_AOI

In [12]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon, GLC_AOI_max_lon, GLC_AOI_min_lat, GLC_AOI_max_lat)

<xarray.DataArray 'lon' ()>
array(15.5953135) <xarray.DataArray 'lon' ()>
array(31.06568387) <xarray.DataArray 'lat' ()>
array(68.35153628) <xarray.DataArray 'lat' ()>
array(71.18764739)


### The cell below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [13]:
# Small region 
Small_AOI_min_lon = 19.65
Small_AOI_max_lon = 19.7
Small_AOI_min_lat = 69.05
Small_AOI_max_lat = 69.1

In [14]:
from h3 import h3
import folium

def visualize_hexagons(hexagons, color="red", folium_map=None):
    """
    hexagons is a list of hexcluster. Each hexcluster is a list of hexagons. 
    eg. [[hex1, hex2], [hex3, hex4]]
    """
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    else:
        m = folium_map
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
        m.add_child(my_PolyLine)
    return m
    

def visualize_polygon(polyline, color):
    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
    m.add_child(my_PolyLine)
    return m

In [15]:
Small_AOI_center = h3.geo_to_h3((Small_AOI_min_lat + Small_AOI_max_lat)/2, (Small_AOI_min_lon +Small_AOI_max_lon)/2, 9) # lat, lng, hex resolution    
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_min_lon, 9)], color="red")                                                                                        
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_min_lon, 9)], color="red", folium_map=m)                                                                                   
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)                                                                                
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)
m = visualize_hexagons([Small_AOI_center], color="green", folium_map=m) 
display(m)

## Year 2015

In [16]:
GLC_AOI

In [17]:
#df = GLC_AOI.isel(time = 0).sel(lat=slice(Small_AOI_max_lat, Small_AOI_min_lat), lon=slice(Small_AOI_min_lon, Small_AOI_max_lon)).to_dataframe()

In [18]:
df = GLC_AOI.isel(time = 0).to_dataframe()

In [19]:
df = df.reset_index()

In [20]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595314,71.187647,2015-01-01,255.0,255.0,255.0,255.0,255.0
1,15.595314,71.186721,2015-01-01,255.0,255.0,255.0,255.0,255.0
2,15.595314,71.185796,2015-01-01,255.0,255.0,255.0,255.0,255.0
3,15.595314,71.184870,2015-01-01,255.0,255.0,255.0,255.0,255.0
4,15.595314,71.183944,2015-01-01,255.0,255.0,255.0,255.0,255.0
...,...,...,...,...,...,...,...,...
51196371,31.065684,68.355240,2015-01-01,,,,,
51196372,31.065684,68.354314,2015-01-01,,,,,
51196373,31.065684,68.353388,2015-01-01,,,,,
51196374,31.065684,68.352462,2015-01-01,,,,,


In [21]:
# Only keep locations where there is some moss & lichen
df = df.loc[(df['Lichen'] > 0) & (df['Lichen'] <= 100)]

In [22]:
# Replace NaNs by 0
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col].fillna(0)

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].fillna(0)


In [23]:
# Calculate total fractional coverage of bare, grass, lichen, shrub and tree (should be 100)
df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])


In [24]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,tot
2520,15.595314,68.854314,2015-01-01,0.0,41.0,1.0,29.0,29.0,100.0
2823,15.595314,68.573759,2015-01-01,20.0,48.0,24.0,8.0,0.0,100.0
2826,15.595314,68.570981,2015-01-01,23.0,58.0,19.0,0.0,0.0,100.0
2840,15.595314,68.558018,2015-01-01,7.0,58.0,25.0,10.0,0.0,100.0
2885,15.595314,68.516351,2015-01-01,35.0,34.0,31.0,0.0,0.0,100.0
...,...,...,...,...,...,...,...,...,...
51191825,31.064758,69.727462,2015-01-01,0.0,62.0,23.0,15.0,0.0,100.0
51191827,31.064758,69.725610,2015-01-01,0.0,78.0,4.0,18.0,0.0,100.0
51191828,31.064758,69.724684,2015-01-01,0.0,82.0,3.0,15.0,0.0,100.0
51191829,31.064758,69.723759,2015-01-01,0.0,97.0,3.0,0.0,0.0,100.0


In [25]:
# Normalize the fractional cover
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col] / df["tot"]

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col] / df["tot"]


In [26]:
# Drop the *tot* column
df = df.drop(["tot"], axis=1)

In [27]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
2520,15.595314,68.854314,2015-01-01,0.00,0.41,0.01,0.29,0.29
2823,15.595314,68.573759,2015-01-01,0.20,0.48,0.24,0.08,0.00
2826,15.595314,68.570981,2015-01-01,0.23,0.58,0.19,0.00,0.00
2840,15.595314,68.558018,2015-01-01,0.07,0.58,0.25,0.10,0.00
2885,15.595314,68.516351,2015-01-01,0.35,0.34,0.31,0.00,0.00
...,...,...,...,...,...,...,...,...
51191825,31.064758,69.727462,2015-01-01,0.00,0.62,0.23,0.15,0.00
51191827,31.064758,69.725610,2015-01-01,0.00,0.78,0.04,0.18,0.00
51191828,31.064758,69.724684,2015-01-01,0.00,0.82,0.03,0.15,0.00
51191829,31.064758,69.723759,2015-01-01,0.00,0.97,0.03,0.00,0.00


In [28]:
# Convert to VAEX
dv = vaex.from_pandas(df)

In [29]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0
...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0


In [30]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
ERA5_lon = ((dv.lon - 15.59) / 0.1).astype('int').values
ERA5_lat = 28 - ((dv.lat - 68.35) / 0.1).astype('int').values

In [50]:
ERA5_lat.max()

28

In [31]:
dv['ERA5_lon_index'] = ERA5_lon
dv['ERA5_lat_index'] = ERA5_lat

In [32]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27
...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15


# ERA5-land data from 2015-01-01 to 2019-12-31 1 already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [33]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27
...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15


In [34]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [35]:
ERA5land = xr.open_dataset(store.open(s3path))

In [36]:
ERA5land

# Adding columns with the ERA5-land longitude and latitude to dv

In [37]:
dv['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dv['ERA5_lon_index'].values].values

In [51]:
dv['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dv['ERA5_lat_index'].values].values

In [67]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,Error_lon,Error_lat
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23,15.59,68.85,0.00531335037511127,0.004315581582503114
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26,15.59,68.55,0.00531335037511127,0.023755448390204492
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26,15.59,68.55,0.00531335037511127,0.020977670612438715
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26,15.59,68.55,0.00531335037511127,0.008014707649465436
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27,15.59,68.45,0.00531335037511127,0.06635414449841903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15,30.99,69.65,0.07475817629051917,0.07746067797290834
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15,30.99,69.65,0.07475817629051917,0.07560882612105502
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15,30.99,69.65,0.07475817629051917,0.07468290019514257
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15,30.99,69.65,0.07475817629051917,0.0737569742692159


In [60]:
dv['Error_lon'] = dv['lon'] - dv['ERA5_lon']
dv['Error_lat'] = dv['lat'] - dv['ERA5_lat']

In [68]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,Error_lon,Error_lat
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23,15.59,68.85,0.00531335037511127,0.004315581582503114
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26,15.59,68.55,0.00531335037511127,0.023755448390204492
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26,15.59,68.55,0.00531335037511127,0.020977670612438715
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26,15.59,68.55,0.00531335037511127,0.008014707649465436
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27,15.59,68.45,0.00531335037511127,0.06635414449841903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15,30.99,69.65,0.07475817629051917,0.07746067797290834
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15,30.99,69.65,0.07475817629051917,0.07560882612105502
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15,30.99,69.65,0.07475817629051917,0.07468290019514257
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15,30.99,69.65,0.07475817629051917,0.0737569742692159


In [53]:
vars = list(ERA5land.keys())
vars

['sd', 't2m', 'tp']

In [54]:
vars = ["t2m"]

In [55]:
import numpy as np

In [57]:
ERA5land["latitude"].min(), ERA5land["latitude"].max()

(<xarray.DataArray 'latitude' ()>
 array(68.34999847),
 <xarray.DataArray 'latitude' ()>
 array(71.15000153))

In [58]:
ERA5land.sel(time="2015-01-01").where(ERA5land["latitude"].isin(dv['ERA5_lat'].values) & ERA5land["longitude"].isin(dv['ERA5_lon'].values))["t2m"].values

array([[[      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        [      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        [      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        ...,
        [277.99997, 277.72177, 277.61148, ..., 271.2707 , 270.7507 ,
         270.17188],
        [278.4624 , 278.28302, 277.9945 , ..., 270.70523, 270.10645,
         269.74158],
        [      nan, 278.76605,       nan, ..., 270.38705, 269.9325 ,
         269.7846 ]],

       [[      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        [      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        [      nan,       nan,       nan, ...,       nan,       nan,
               nan],
        ...,
        [277.7436 , 277.42966, 277.27936, ..., 271.23373, 270.75797,
         270.2228 ],
        [278.17453, 277.96967, 277.6636 , ..., 270.67252, 270.09976,
   

In [98]:
#for l in range(len(dv)):
for l in range(10):
    print(dv['lon', 'lat', 'Bare', 'Grass', 'Lichen', 'Shrub', 'Tree'].values[l])

[1.55953135e+01 6.88543141e+01 0.00000000e+00 4.09999996e-01
 9.99999978e-03 2.89999992e-01 2.89999992e-01]
[15.5953135  68.5737585   0.2         0.47999999  0.23999999  0.08
  0.        ]
[15.5953135  68.57098072  0.23        0.57999998  0.19        0.
  0.        ]
[15.5953135  68.55801776  0.07        0.57999998  0.25        0.1
  0.        ]
[15.5953135  68.51635109  0.34999999  0.34        0.31        0.
  0.        ]
[1.55953135e+01 6.85043141e+01 0.00000000e+00 3.10000002e-01
 9.99999978e-03 2.09999993e-01 4.69999999e-01]
[1.55962394e+01 6.90524622e+01 0.00000000e+00 2.70000011e-01
 5.99999987e-02 2.19999999e-01 4.49999988e-01]
[1.55962394e+01 6.88533881e+01 0.00000000e+00 5.60000002e-01
 9.99999978e-03 2.50000000e-01 1.80000007e-01]
[15.59623943 68.56357331  0.15000001  0.54000002  0.31        0.
  0.        ]
[15.59623943 68.55801776  0.07        0.57999998  0.25        0.1
  0.        ]


In [70]:
for var in vars:
    v = ERA5land.sel(time="2015-01-01")[var][dv['ERA5_lon_index'].values].values.flatten()
    print(v)
    for t in range(len(v)):
        dv["ERA5_" + var + "_" + str(t)] = ""
        #dv["ERA5_" + var + "_" + str(t)][dv['ERA5_lon_index'].values] = np.nan
    #print("ERA5_" + var)
    #print(len(ERA5land[var][dv['ERA5_lon_index'].values].values.flatten().transpose()))

IndexError: index 24 is out of bounds for axis 0 with size 24

In [None]:
list(ERA5land.coords.keys())

In [None]:
dv

In [None]:
dv['ERA5_lat'] = ERA5land.latitude[dv['ERA5_lat_index'].values].values

In [None]:
dv

In [None]:
dv['ERA5_index'] = (dv['ERA5_lon'] * 100).astype('int') + ((dv['ERA5_lat'] * 100).astype('int') / 10000)

In [None]:
dv

In [None]:
# Year 2015 - Only keeping here the first 2 days
saison = pd.date_range("2015-01-01", periods=2*24, freq="1H")

In [None]:
saison

In [None]:
# Create labels for ERA5-land variables to be added to dv
label = list()
for i in range(len(saison)):
#    label.append('sd_'+ str(i))
    label.append('t2m_'+ str(i))
#    label.append('tp_'+ str(i))

In [None]:
label

In [None]:
# Keep only data corresponding to a "season"
ERA5land = ERA5land.sel(time = saison)

In [None]:
ERA5land

In [None]:
# Keep only data corresponding to a "season"
dg = ERA5land.to_dataframe()

In [None]:
dg = dg.reset_index()

In [None]:
dg

In [None]:
dg['ERA5_index'] = (dg['longitude'] * 100).astype('int') + ((dg['latitude'] * 100).astype('int') / 10000)

In [None]:
dg

In [None]:
dw = vaex.from_pandas(dg)

In [None]:
dw = dw.drop('longitude').drop('latitude').drop('time')

In [None]:
dw

In [None]:
dv = dv.drop('time').drop('ERA5_lon_index').drop('ERA5_lat_index').drop('ERA5_lon').drop('ERA5_lat')

In [None]:
dv

In [None]:
label

In [None]:
dv[label] = ???