# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, grass, shrubs and trees

In [None]:
!date

### Define s3 storage parameters

In [None]:
import s3fs
import xarray as xr
import h3
import vaex

In [None]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

### Define s3 store for the **netCDF file**

In [None]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [None]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [None]:
GLC_AOI

In [None]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [None]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [None]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

### The two cells below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [None]:
# Small region 
AOI_min_lon = 19.9955
AOI_max_lon = 20
AOI_min_lat = 69
AOI_max_lat = 69.005

In [None]:
GLC_AOI = GLC_AOI.sel(lat=slice(AOI_max_lat, AOI_min_lat), lon=slice(AOI_min_lon, AOI_max_lon))

In [None]:
GLC_AOI

In [None]:
%%time
h3_level = 11
def lat_lon_to_h3(lat, lon):
    return h3.geo_to_h3(lat, lon, h3_level)

In [None]:
df = GLC_AOI.isel(time = 2).to_dataframe()

In [None]:
df

In [None]:
df = df.reset_index()

In [None]:
df

In [None]:
# Drop lines where MossLichen_CoverFraction_layer is NaN
df = df.dropna(subset=['MossLichen_CoverFraction_layer'])

In [None]:
df

In [None]:
dv = vaex.from_pandas(df)

In [None]:
dv

In [None]:
dv["h3"] = dv.apply(lat_lon_to_h3, [dv.lon, dv.lat])

In [None]:
dv.apply(lat_lon_to_h3, [dv.lon, dv.lat])

In [None]:
dfm = dv.materialize('h3')

In [None]:
dfm

# ERA5-land data from 2015-01-01 to 2019-12-31 1 already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [None]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [None]:
ERA5land = xr.open_dataset(store.open(s3path))

In [None]:
ERA5land

In [None]:
# ERA5land_AOI = ERA5land.sel(latitude=slice(AOI_max_lat, AOI_min_lat), longitude=slice(AOI_min_lon, AOI_max_lon))

In [None]:
ERA5land

In [None]:
dg = ERA5land.isel(time = 1).to_dataframe()

In [None]:
dg = dg.reset_index()

In [None]:
dg

In [None]:
dg = ERA5land[['longitude','latitude']].to_dataframe()

In [None]:
dg = dg.reset_index()

In [None]:
dw = vaex.from_pandas(dg)

In [None]:
dw["h3"] = dw.apply(lat_lon_to_h3, [dw.latitude, dw.longitude])

In [None]:
dfn = dw.materialize('h3')

In [None]:
dfn

In [None]:
dfall = dfm.join(dfn, how='right',left_on='h3',right_on='h3', allow_duplication=True)

In [None]:
dfall

In [None]:
import pys2index