# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [1]:
!date

Thu Feb  9 13:22:00 UTC 2023


### Define s3 storage parameters

In [2]:
import s3fs
import xarray as xr
import pandas as pd
import h3
import vaex

In [3]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-single-levels_2015-2019.nc',
  'LastModified': datetime.datetime(2023, 1, 6, 7, 24, 40, 847000, tzinfo=tzlocal()),
  'ETag': '"2582b929cffaa1770daa45a508dc689a-8"',
  'Size': 391

### Define s3 store for the **netCDF file**

In [4]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [5]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [6]:
GLC_AOI

In [7]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [8]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [9]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

In [10]:
GLC_AOI

In [11]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon, GLC_AOI_max_lon, GLC_AOI_min_lat, GLC_AOI_max_lat)

<xarray.DataArray 'lon' ()>
array(15.5953135) <xarray.DataArray 'lon' ()>
array(31.06568387) <xarray.DataArray 'lat' ()>
array(68.35153628) <xarray.DataArray 'lat' ()>
array(71.18764739)


### The cell below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [12]:
# Small region 
Small_AOI_min_lon = 19.6
Small_AOI_max_lon = 19.7
Small_AOI_min_lat = 69
Small_AOI_max_lat = 69.1

In [13]:
from h3 import h3
import folium

def visualize_hexagons(hexagons, color="red", folium_map=None):
    """
    hexagons is a list of hexcluster. Each hexcluster is a list of hexagons. 
    eg. [[hex1, hex2], [hex3, hex4]]
    """
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    else:
        m = folium_map
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
        m.add_child(my_PolyLine)
    return m
    

def visualize_polygon(polyline, color):
    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
    m.add_child(my_PolyLine)
    return m

In [14]:
Small_AOI_center = h3.geo_to_h3((Small_AOI_min_lat + Small_AOI_max_lat)/2, (Small_AOI_min_lon +Small_AOI_max_lon)/2, 9) # lat, lng, hex resolution    
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_min_lon, 9)], color="red")                                                                                        
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_min_lon, 9)], color="red", folium_map=m)                                                                                   
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)                                                                                
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)
m = visualize_hexagons([Small_AOI_center], color="green", folium_map=m) 
display(m)

## Year 2015

In [15]:
df = GLC_AOI.isel(time = 0).to_dataframe()

In [16]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,time,Bare,Grass,Lichen,Shrub,Tree
lon,lat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
15.595314,71.187647,2015-01-01,255.0,255.0,255.0,255.0,255.0
15.595314,71.186721,2015-01-01,255.0,255.0,255.0,255.0,255.0
15.595314,71.185796,2015-01-01,255.0,255.0,255.0,255.0,255.0
15.595314,71.184870,2015-01-01,255.0,255.0,255.0,255.0,255.0
15.595314,71.183944,2015-01-01,255.0,255.0,255.0,255.0,255.0
...,...,...,...,...,...,...,...
31.065684,68.355240,2015-01-01,,,,,
31.065684,68.354314,2015-01-01,,,,,
31.065684,68.353388,2015-01-01,,,,,
31.065684,68.352462,2015-01-01,,,,,


In [17]:
df = df.reset_index()

In [18]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595314,71.187647,2015-01-01,255.0,255.0,255.0,255.0,255.0
1,15.595314,71.186721,2015-01-01,255.0,255.0,255.0,255.0,255.0
2,15.595314,71.185796,2015-01-01,255.0,255.0,255.0,255.0,255.0
3,15.595314,71.184870,2015-01-01,255.0,255.0,255.0,255.0,255.0
4,15.595314,71.183944,2015-01-01,255.0,255.0,255.0,255.0,255.0
...,...,...,...,...,...,...,...,...
51196371,31.065684,68.355240,2015-01-01,,,,,
51196372,31.065684,68.354314,2015-01-01,,,,,
51196373,31.065684,68.353388,2015-01-01,,,,,
51196374,31.065684,68.352462,2015-01-01,,,,,


In [19]:
# Only keep locations where there is some moss & lichen
df = df.loc[(df['Lichen'] > 0) & (df['Lichen'] <= 100)]

In [20]:
# Replace NaNs by 0
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col].fillna(0)

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].fillna(0)


In [21]:
# Calculate total fractional coverage of bare, grass, lichen, shrub and tree (should be 100)
df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])


In [22]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,tot
2520,15.595314,68.854314,2015-01-01,0.0,41.0,1.0,29.0,29.0,100.0
2823,15.595314,68.573759,2015-01-01,20.0,48.0,24.0,8.0,0.0,100.0
2826,15.595314,68.570981,2015-01-01,23.0,58.0,19.0,0.0,0.0,100.0
2840,15.595314,68.558018,2015-01-01,7.0,58.0,25.0,10.0,0.0,100.0
2885,15.595314,68.516351,2015-01-01,35.0,34.0,31.0,0.0,0.0,100.0
...,...,...,...,...,...,...,...,...,...
51191825,31.064758,69.727462,2015-01-01,0.0,62.0,23.0,15.0,0.0,100.0
51191827,31.064758,69.725610,2015-01-01,0.0,78.0,4.0,18.0,0.0,100.0
51191828,31.064758,69.724684,2015-01-01,0.0,82.0,3.0,15.0,0.0,100.0
51191829,31.064758,69.723759,2015-01-01,0.0,97.0,3.0,0.0,0.0,100.0


In [23]:
# Normalize the fractional cover
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col] / df["tot"]

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col] / df["tot"]


In [24]:
# Drop the *tot* column
df = df.drop(["tot"], axis=1)

In [25]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
2520,15.595314,68.854314,2015-01-01,0.00,0.41,0.01,0.29,0.29
2823,15.595314,68.573759,2015-01-01,0.20,0.48,0.24,0.08,0.00
2826,15.595314,68.570981,2015-01-01,0.23,0.58,0.19,0.00,0.00
2840,15.595314,68.558018,2015-01-01,0.07,0.58,0.25,0.10,0.00
2885,15.595314,68.516351,2015-01-01,0.35,0.34,0.31,0.00,0.00
...,...,...,...,...,...,...,...,...
51191825,31.064758,69.727462,2015-01-01,0.00,0.62,0.23,0.15,0.00
51191827,31.064758,69.725610,2015-01-01,0.00,0.78,0.04,0.18,0.00
51191828,31.064758,69.724684,2015-01-01,0.00,0.82,0.03,0.15,0.00
51191829,31.064758,69.723759,2015-01-01,0.00,0.97,0.03,0.00,0.00


In [26]:
# Convert to VAEX
dv = vaex.from_pandas(df)

In [27]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0
...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0


In [28]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
ERA5_lon = ((dv.lon - 15.59) / 0.1).astype('int').values
ERA5_lat = 28 - ((dv.lat - 68.35) / 0.1).astype('int').values

In [29]:
dv['ERA5_lon_index'] = ERA5_lon
dv['ERA5_lat_index'] = ERA5_lat

In [30]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27
...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15


# ERA5-land data from 2015-01-01 to 2019-12-31 1 already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [31]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [32]:
ERA5land = xr.open_dataset(store.open(s3path))

In [33]:
ERA5land

# Adding columns with the ERA5-land longitude and latitude to dv

In [34]:
dv['ERA5_lon'] = ERA5land.longitude[dv['ERA5_lon_index'].values].values

In [35]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23,15.59
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26,15.59
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26,15.59
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26,15.59
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27,15.59
...,...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15,30.99
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15,30.99
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15,30.99
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15,30.99


In [36]:
dv['ERA5_lat'] = ERA5land.latitude[dv['ERA5_lat_index'].values].values

In [37]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23,15.59,68.85
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26,15.59,68.55
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26,15.59,68.55
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26,15.59,68.55
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27,15.59,68.45
...,...,...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15,30.99,69.65
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15,30.99,69.65
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15,30.99,69.65
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15,30.99,69.65


In [38]:
dv['ERA5_index'] = (dv['ERA5_lon'] * 100).astype('int') + ((dv['ERA5_lat'] * 100).astype('int') / 10000)

In [39]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,ERA5_index
0,15.595313502963002,68.8543140557036,2015-01-01 00:00:00.000000000,0.0,0.41,0.01,0.29,0.29,0,23,15.59,68.85,1559.6885
1,15.595313502963002,68.57375850014802,2015-01-01 00:00:00.000000000,0.2,0.48,0.24,0.08,0.0,0,26,15.59,68.55,1559.6855
2,15.595313502963002,68.57098072237025,2015-01-01 00:00:00.000000000,0.23,0.58,0.19,0.0,0.0,0,26,15.59,68.55,1559.6855
3,15.595313502963002,68.55801775940728,2015-01-01 00:00:00.000000000,0.07,0.58,0.25,0.1,0.0,0,26,15.59,68.55,1559.6855
4,15.595313502963002,68.5163510927406,2015-01-01 00:00:00.000000000,0.35,0.34,0.31,0.0,0.0,0,27,15.59,68.45,1559.6844
...,...,...,...,...,...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,2015-01-01 00:00:00.000000000,0.0,0.62,0.23,0.15,0.0,154,15,30.99,69.65,3099.6965
335765,31.064757947408683,69.72561035199996,2015-01-01 00:00:00.000000000,0.0,0.78,0.04,0.18,0.0,154,15,30.99,69.65,3099.6965
335766,31.064757947408683,69.72468442607405,2015-01-01 00:00:00.000000000,0.0,0.82,0.03,0.15,0.0,154,15,30.99,69.65,3099.6965
335767,31.064757947408683,69.72375850014812,2015-01-01 00:00:00.000000000,0.0,0.97,0.03,0.0,0.0,154,15,30.99,69.65,3099.6965


In [40]:
# Year 2015 - Only keeping here the first 2 days
saison = pd.date_range("2015-01-01", periods=2*24, freq="1H")

In [41]:
saison

DatetimeIndex(['2015-01-01 00:00:00', '2015-01-01 01:00:00',
               '2015-01-01 02:00:00', '2015-01-01 03:00:00',
               '2015-01-01 04:00:00', '2015-01-01 05:00:00',
               '2015-01-01 06:00:00', '2015-01-01 07:00:00',
               '2015-01-01 08:00:00', '2015-01-01 09:00:00',
               '2015-01-01 10:00:00', '2015-01-01 11:00:00',
               '2015-01-01 12:00:00', '2015-01-01 13:00:00',
               '2015-01-01 14:00:00', '2015-01-01 15:00:00',
               '2015-01-01 16:00:00', '2015-01-01 17:00:00',
               '2015-01-01 18:00:00', '2015-01-01 19:00:00',
               '2015-01-01 20:00:00', '2015-01-01 21:00:00',
               '2015-01-01 22:00:00', '2015-01-01 23:00:00',
               '2015-01-02 00:00:00', '2015-01-02 01:00:00',
               '2015-01-02 02:00:00', '2015-01-02 03:00:00',
               '2015-01-02 04:00:00', '2015-01-02 05:00:00',
               '2015-01-02 06:00:00', '2015-01-02 07:00:00',
               '2015-01-

In [42]:
# Create labels for ERA5-land variables to be added to dv
label = list()
for i in range(len(saison)):
#    label.append('sd_'+ str(i))
    label.append('t2m_'+ str(i))
#    label.append('tp_'+ str(i))

In [43]:
label

['t2m_0',
 't2m_1',
 't2m_2',
 't2m_3',
 't2m_4',
 't2m_5',
 't2m_6',
 't2m_7',
 't2m_8',
 't2m_9',
 't2m_10',
 't2m_11',
 't2m_12',
 't2m_13',
 't2m_14',
 't2m_15',
 't2m_16',
 't2m_17',
 't2m_18',
 't2m_19',
 't2m_20',
 't2m_21',
 't2m_22',
 't2m_23',
 't2m_24',
 't2m_25',
 't2m_26',
 't2m_27',
 't2m_28',
 't2m_29',
 't2m_30',
 't2m_31',
 't2m_32',
 't2m_33',
 't2m_34',
 't2m_35',
 't2m_36',
 't2m_37',
 't2m_38',
 't2m_39',
 't2m_40',
 't2m_41',
 't2m_42',
 't2m_43',
 't2m_44',
 't2m_45',
 't2m_46',
 't2m_47']

In [44]:
# Keep only data corresponding to a "season"
ERA5land = ERA5land.sel(time = saison)

In [45]:
ERA5land

In [46]:
# Keep only data corresponding to a "season"
dg = ERA5land.to_dataframe()

In [47]:
dg = dg.reset_index()

In [48]:
dg

Unnamed: 0,longitude,latitude,time,sd,t2m,tp
0,15.59,71.150002,2015-01-01 00:00:00,,,
1,15.59,71.150002,2015-01-01 01:00:00,,,
2,15.59,71.150002,2015-01-01 02:00:00,,,
3,15.59,71.150002,2015-01-01 03:00:00,,,
4,15.59,71.150002,2015-01-01 04:00:00,,,
...,...,...,...,...,...,...
215755,30.99,68.349998,2015-01-02 19:00:00,0.060239,270.659790,0.000318
215756,30.99,68.349998,2015-01-02 20:00:00,0.060292,270.559784,0.000349
215757,30.99,68.349998,2015-01-02 21:00:00,0.060292,270.438568,0.000409
215758,30.99,68.349998,2015-01-02 22:00:00,0.060450,270.272491,0.000576


In [49]:
dg['ERA5_index'] = (dg['longitude'] * 100).astype('int') + ((dg['latitude'] * 100).astype('int') / 10000)

In [50]:
dg

Unnamed: 0,longitude,latitude,time,sd,t2m,tp,ERA5_index
0,15.59,71.150002,2015-01-01 00:00:00,,,,1559.7115
1,15.59,71.150002,2015-01-01 01:00:00,,,,1559.7115
2,15.59,71.150002,2015-01-01 02:00:00,,,,1559.7115
3,15.59,71.150002,2015-01-01 03:00:00,,,,1559.7115
4,15.59,71.150002,2015-01-01 04:00:00,,,,1559.7115
...,...,...,...,...,...,...,...
215755,30.99,68.349998,2015-01-02 19:00:00,0.060239,270.659790,0.000318,3098.6834
215756,30.99,68.349998,2015-01-02 20:00:00,0.060292,270.559784,0.000349,3098.6834
215757,30.99,68.349998,2015-01-02 21:00:00,0.060292,270.438568,0.000409,3098.6834
215758,30.99,68.349998,2015-01-02 22:00:00,0.060450,270.272491,0.000576,3098.6834


In [51]:
dw = vaex.from_pandas(dg)

In [52]:
dw = dw.drop('longitude').drop('latitude').drop('time')

In [53]:
dw

#,sd,t2m,tp,ERA5_index
0,,,,1559.7115
1,,,,1559.7115
2,,,,1559.7115
3,,,,1559.7115
4,,,,1559.7115
...,...,...,...,...
215755,0.060239315,270.6598,0.00031849742,3098.6834
215756,0.060292006,270.55978,0.0003487952,3098.6834
215757,0.060292006,270.43857,0.00040902104,3098.6834
215758,0.060449958,270.2725,0.00057602953,3098.6834


In [54]:
dv = dv.drop('time').drop('ERA5_lon_index').drop('ERA5_lat_index').drop('ERA5_lon').drop('ERA5_lat')

In [55]:
dv

#,lon,lat,Bare,Grass,Lichen,Shrub,Tree,ERA5_index
0,15.595313502963002,68.8543140557036,0.0,0.41,0.01,0.29,0.29,1559.6885
1,15.595313502963002,68.57375850014802,0.2,0.48,0.24,0.08,0.0,1559.6855
2,15.595313502963002,68.57098072237025,0.23,0.58,0.19,0.0,0.0,1559.6855
3,15.595313502963002,68.55801775940728,0.07,0.58,0.25,0.1,0.0,1559.6855
4,15.595313502963002,68.5163510927406,0.35,0.34,0.31,0.0,0.0,1559.6844
...,...,...,...,...,...,...,...,...
335764,31.064757947408683,69.72746220385181,0.0,0.62,0.23,0.15,0.0,3099.6965
335765,31.064757947408683,69.72561035199996,0.0,0.78,0.04,0.18,0.0,3099.6965
335766,31.064757947408683,69.72468442607405,0.0,0.82,0.03,0.15,0.0,3099.6965
335767,31.064757947408683,69.72375850014812,0.0,0.97,0.03,0.0,0.0,3099.6965


In [56]:
label

['t2m_0',
 't2m_1',
 't2m_2',
 't2m_3',
 't2m_4',
 't2m_5',
 't2m_6',
 't2m_7',
 't2m_8',
 't2m_9',
 't2m_10',
 't2m_11',
 't2m_12',
 't2m_13',
 't2m_14',
 't2m_15',
 't2m_16',
 't2m_17',
 't2m_18',
 't2m_19',
 't2m_20',
 't2m_21',
 't2m_22',
 't2m_23',
 't2m_24',
 't2m_25',
 't2m_26',
 't2m_27',
 't2m_28',
 't2m_29',
 't2m_30',
 't2m_31',
 't2m_32',
 't2m_33',
 't2m_34',
 't2m_35',
 't2m_36',
 't2m_37',
 't2m_38',
 't2m_39',
 't2m_40',
 't2m_41',
 't2m_42',
 't2m_43',
 't2m_44',
 't2m_45',
 't2m_46',
 't2m_47']

In [57]:
dv[label] = ???

SyntaxError: invalid syntax (2675570853.py, line 1)