# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, grass, shrubs and trees

In [1]:
!date

Mon Feb  6 19:49:01 CET 2023


### Define s3 storage parameters

In [2]:
import s3fs
import xarray as xr
import h3
import vaex

In [3]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzutc()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzutc()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-single-levels_2015-2019.nc',
  'LastModified': datetime.datetime(2023, 1, 6, 7, 24, 40, 847000, tzinfo=tzutc()),
  'ETag': '"2582b929cffaa1770daa45a508dc689a-8"',
  'Size': 391630441

### Define s3 store for the **netCDF file**

In [4]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [5]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [6]:
GLC_AOI

In [7]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [8]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [9]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

In [10]:
GLC_AOI

### The two cells below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [11]:
# Small region 
AOI_min_lon = 19.9955
AOI_max_lon = 20
AOI_min_lat = 69
AOI_max_lat = 69.005

In [12]:
GLC_AOI = GLC_AOI.sel(lat=slice(AOI_max_lat, AOI_min_lat), lon=slice(AOI_min_lon, AOI_max_lon))

In [13]:
GLC_AOI

In [50]:
%%time
h3_level = 12
def lat_lon_to_h3(lat, lon):
    return h3.geo_to_h3(lat, lon, h3_level)

CPU times: user 12 µs, sys: 4 µs, total: 16 µs
Wall time: 25.5 µs


In [51]:
df = GLC_AOI.isel(time = 2).to_dataframe()

In [52]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,time,Bare,Grass,Lichen,Shrub,Tree
lon,lat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
19.996239,69.004314,2017-01-01,48.0,18.0,22.0,12.0,
19.996239,69.003388,2017-01-01,56.0,25.0,8.0,11.0,
19.996239,69.002462,2017-01-01,52.0,23.0,9.0,16.0,
19.996239,69.001536,2017-01-01,49.0,30.0,,7.0,14.0
19.996239,69.00061,2017-01-01,78.0,22.0,,,
19.997165,69.004314,2017-01-01,61.0,20.0,14.0,5.0,
19.997165,69.003388,2017-01-01,54.0,18.0,15.0,13.0,
19.997165,69.002462,2017-01-01,47.0,28.0,8.0,14.0,3.0
19.997165,69.001536,2017-01-01,52.0,20.0,10.0,18.0,
19.997165,69.00061,2017-01-01,67.0,28.0,,5.0,


In [53]:
df = df.reset_index()

In [54]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,19.996239,69.004314,2017-01-01,48.0,18.0,22.0,12.0,
1,19.996239,69.003388,2017-01-01,56.0,25.0,8.0,11.0,
2,19.996239,69.002462,2017-01-01,52.0,23.0,9.0,16.0,
3,19.996239,69.001536,2017-01-01,49.0,30.0,,7.0,14.0
4,19.996239,69.00061,2017-01-01,78.0,22.0,,,
5,19.997165,69.004314,2017-01-01,61.0,20.0,14.0,5.0,
6,19.997165,69.003388,2017-01-01,54.0,18.0,15.0,13.0,
7,19.997165,69.002462,2017-01-01,47.0,28.0,8.0,14.0,3.0
8,19.997165,69.001536,2017-01-01,52.0,20.0,10.0,18.0,
9,19.997165,69.00061,2017-01-01,67.0,28.0,,5.0,


In [55]:
df = df.loc[(df['Lichen'] > 0) & (df['Lichen'] <= 100)]

In [56]:
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col].fillna(0)

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].fillna(0)


In [57]:
df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])


In [58]:
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col] / df["tot"]

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col] / df["tot"]


In [59]:
df = df.drop(["tot"], axis=1)
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,19.996239,69.004314,2017-01-01,0.48,0.18,0.22,0.12,0.0
1,19.996239,69.003388,2017-01-01,0.56,0.25,0.08,0.11,0.0
2,19.996239,69.002462,2017-01-01,0.52,0.23,0.09,0.16,0.0
5,19.997165,69.004314,2017-01-01,0.61,0.2,0.14,0.05,0.0
6,19.997165,69.003388,2017-01-01,0.54,0.18,0.15,0.13,0.0
7,19.997165,69.002462,2017-01-01,0.47,0.28,0.08,0.14,0.03
8,19.997165,69.001536,2017-01-01,0.52,0.2,0.1,0.18,0.0
10,19.998091,69.004314,2017-01-01,0.61,0.2,0.14,0.05,0.0
11,19.998091,69.003388,2017-01-01,0.54,0.18,0.15,0.13,0.0
12,19.998091,69.002462,2017-01-01,0.47,0.28,0.08,0.14,0.03


In [60]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,19.996239,69.004314,2017-01-01,0.48,0.18,0.22,0.12,0.0
1,19.996239,69.003388,2017-01-01,0.56,0.25,0.08,0.11,0.0
2,19.996239,69.002462,2017-01-01,0.52,0.23,0.09,0.16,0.0
5,19.997165,69.004314,2017-01-01,0.61,0.2,0.14,0.05,0.0
6,19.997165,69.003388,2017-01-01,0.54,0.18,0.15,0.13,0.0
7,19.997165,69.002462,2017-01-01,0.47,0.28,0.08,0.14,0.03
8,19.997165,69.001536,2017-01-01,0.52,0.2,0.1,0.18,0.0
10,19.998091,69.004314,2017-01-01,0.61,0.2,0.14,0.05,0.0
11,19.998091,69.003388,2017-01-01,0.54,0.18,0.15,0.13,0.0
12,19.998091,69.002462,2017-01-01,0.47,0.28,0.08,0.14,0.03


In [61]:
dv = vaex.from_pandas(df)

In [62]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,19.99623942888928,69.00431405570362,2017-01-01 00:00:00.000000000,0.48,0.18,0.22,0.12,0.0
1,19.99623942888928,69.00338812977769,2017-01-01 00:00:00.000000000,0.56,0.25,0.08,0.11,0.0
2,19.99623942888928,69.00246220385176,2017-01-01 00:00:00.000000000,0.52,0.23,0.09,0.16,0.0
3,19.997165354815206,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0
4,19.997165354815206,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0
...,...,...,...,...,...,...,...,...
14,19.99901720666706,69.00153627792584,2017-01-01 00:00:00.000000000,0.52,0.2,0.1,0.18,0.0
15,19.999943132592982,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0
16,19.999943132592982,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0
17,19.999943132592982,69.00246220385176,2017-01-01 00:00:00.000000000,0.47,0.28,0.08,0.14,0.03


In [63]:
dv["h3"] = dv.apply(lat_lon_to_h3, [dv.lon, dv.lat])

In [64]:
dv.apply(lat_lon_to_h3, [dv.lon, dv.lat])

Expression = lambda_function_1(lon, lat)
Length: 19 dtype: string (expression)
-------------------------------------
 0  8c42d0509ad97ff
 1  8c42d0509adc5ff
 2  8c42d0509ad55ff
 3  8c42d0509ac81ff
 4  8c42d0509ac11ff
        ...        
14  8c42d0509a0d3ff
15  8c42d0509a481ff
16  8c42d0509a41dff
17  8c42d0509a44dff
18  8c42d0509a74bff

In [65]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,h3
0,19.99623942888928,69.00431405570362,2017-01-01 00:00:00.000000000,0.48,0.18,0.22,0.12,0.0,8c42d0509ad97ff
1,19.99623942888928,69.00338812977769,2017-01-01 00:00:00.000000000,0.56,0.25,0.08,0.11,0.0,8c42d0509adc5ff
2,19.99623942888928,69.00246220385176,2017-01-01 00:00:00.000000000,0.52,0.23,0.09,0.16,0.0,8c42d0509ad55ff
3,19.997165354815206,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0,8c42d0509ac81ff
4,19.997165354815206,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0,8c42d0509ac11ff
...,...,...,...,...,...,...,...,...,...
14,19.99901720666706,69.00153627792584,2017-01-01 00:00:00.000000000,0.52,0.2,0.1,0.18,0.0,8c42d0509a0d3ff
15,19.999943132592982,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0,8c42d0509a481ff
16,19.999943132592982,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0,8c42d0509a41dff
17,19.999943132592982,69.00246220385176,2017-01-01 00:00:00.000000000,0.47,0.28,0.08,0.14,0.03,8c42d0509a44dff


# ERA5-land data from 2015-01-01 to 2019-12-31 1 already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [66]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [67]:
ERA5land = xr.open_dataset(store.open(s3path))

In [68]:
ERA5land

In [69]:
# ERA5land_AOI = ERA5land.sel(latitude=slice(AOI_max_lat, AOI_min_lat), longitude=slice(AOI_min_lon, AOI_max_lon))

In [70]:
ERA5land

In [71]:
dg = ERA5land.isel(time = 1).to_dataframe()

In [72]:
dg = dg.reset_index()

In [73]:
dg

Unnamed: 0,longitude,latitude,time,sd,t2m,tp
0,15.59,71.150002,2015-01-01 01:00:00,,,
1,15.59,71.050003,2015-01-01 01:00:00,,,
2,15.59,70.949997,2015-01-01 01:00:00,,,
3,15.59,70.849998,2015-01-01 01:00:00,,,
4,15.59,70.750000,2015-01-01 01:00:00,,,
...,...,...,...,...,...,...
4490,30.99,68.750000,2015-01-01 01:00:00,0.061398,271.424042,0.000006
4491,30.99,68.650002,2015-01-01 01:00:00,0.040282,271.004028,0.000004
4492,30.99,68.550003,2015-01-01 01:00:00,0.021063,270.222809,0.000003
4493,30.99,68.449997,2015-01-01 01:00:00,0.025749,269.753693,0.000002


In [74]:
dg = ERA5land[['longitude','latitude']].to_dataframe()

In [75]:
dg = dg.reset_index()

In [76]:
dw = vaex.from_pandas(dg)

In [77]:
dw["h3"] = dw.apply(lat_lon_to_h3, [dw.latitude, dw.longitude])

In [78]:
dw

#,longitude,latitude,h3
0,15.59000015258789,71.1500015258789,8c09489607895ff
1,15.59000015258789,71.05000305175781,8c0948825322dff
2,15.59000015258789,70.94999694824219,8c09488146129ff
3,15.59000015258789,70.8499984741211,8c09488b33005ff
4,15.59000015258789,70.75,8c0948c626aa9ff
...,...,...,...
4490,30.989999771118164,68.75,8c08cb3a06a39ff
4491,30.989999771118164,68.6500015258789,8c08cb0c89a6dff
4492,30.989999771118164,68.55000305175781,8c08cb72c95ebff
4493,30.989999771118164,68.44999694824219,8c08cb7304d0bff


In [80]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,h3
0,19.99623942888928,69.00431405570362,2017-01-01 00:00:00.000000000,0.48,0.18,0.22,0.12,0.0,8c42d0509ad97ff
1,19.99623942888928,69.00338812977769,2017-01-01 00:00:00.000000000,0.56,0.25,0.08,0.11,0.0,8c42d0509adc5ff
2,19.99623942888928,69.00246220385176,2017-01-01 00:00:00.000000000,0.52,0.23,0.09,0.16,0.0,8c42d0509ad55ff
3,19.997165354815206,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0,8c42d0509ac81ff
4,19.997165354815206,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0,8c42d0509ac11ff
...,...,...,...,...,...,...,...,...,...
14,19.99901720666706,69.00153627792584,2017-01-01 00:00:00.000000000,0.52,0.2,0.1,0.18,0.0,8c42d0509a0d3ff
15,19.999943132592982,69.00431405570362,2017-01-01 00:00:00.000000000,0.61,0.2,0.14,0.05,0.0,8c42d0509a481ff
16,19.999943132592982,69.00338812977769,2017-01-01 00:00:00.000000000,0.54,0.18,0.15,0.13,0.0,8c42d0509a41dff
17,19.999943132592982,69.00246220385176,2017-01-01 00:00:00.000000000,0.47,0.28,0.08,0.14,0.03,8c42d0509a44dff


In [84]:
dw[(dw['h3'] == "8c42d0509a44dff")]

#,longitude,latitude,h3
,,,


In [48]:
dfall = dg.join(dw, how='right',left_on='h3',right_on='h3', allow_duplication=True)

TypeError: join() got an unexpected keyword argument 'left_on'

In [None]:
dfall

In [None]:
import pys2index