# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [None]:
!date

### Define s3 storage parameters

In [None]:
import s3fs
import xarray as xr
import h3
import vaex

In [None]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

### Define s3 store for the **netCDF file**

In [None]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [None]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [None]:
GLC_AOI

In [None]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [None]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [None]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

In [None]:
GLC_AOI

In [None]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon, GLC_AOI_max_lon, GLC_AOI_min_lat, GLC_AOI_max_lat)

### The cell below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [None]:
# Small region 
Small_AOI_min_lon = 19.6
Small_AOI_max_lon = 19.7
Small_AOI_min_lat = 69
Small_AOI_max_lat = 69.1

In [None]:
from h3 import h3
import folium

def visualize_hexagons(hexagons, color="red", folium_map=None):
    """
    hexagons is a list of hexcluster. Each hexcluster is a list of hexagons. 
    eg. [[hex1, hex2], [hex3, hex4]]
    """
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    else:
        m = folium_map
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
        m.add_child(my_PolyLine)
    return m
    

def visualize_polygon(polyline, color):
    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
    m.add_child(my_PolyLine)
    return m

In [None]:
Small_AOI_center = h3.geo_to_h3((Small_AOI_min_lat + Small_AOI_max_lat)/2, (Small_AOI_min_lon +Small_AOI_max_lon)/2, 9) # lat, lng, hex resolution    
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_min_lon, 9)], color="red")                                                                                        
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_min_lon, 9)], color="red", folium_map=m)                                                                                   
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)                                                                                
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)
m = visualize_hexagons([Small_AOI_center], color="green", folium_map=m) 
display(m)

## Year 2015

In [None]:
df = GLC_AOI.isel(time = 0).to_dataframe()

In [None]:
df

In [None]:
df = df.reset_index()

In [None]:
df

In [None]:
# Only keep locations where there is some moss & lichen
df = df.loc[(df['Lichen'] > 0) & (df['Lichen'] <= 100)]

In [None]:
# Replace NaNs by 0
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col].fillna(0)

In [None]:
# Calculate total fractional coverage of bare, grass, lichen, shrub and tree (should be 100)
df["tot"]  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])

In [None]:
df

In [None]:
# Normalize the fractional cover
for col in ["Bare", "Grass", "Lichen", "Shrub", "Tree"]:
    print(col)
    df[col] = df[col] / df["tot"]

In [None]:
# Drop the *tot* column
df = df.drop(["tot"], axis=1)

In [None]:
df

In [None]:
# Convert to VAEX
dv = vaex.from_pandas(df)

In [None]:
dv

In [None]:
# Find the correspondind ERA5-land lat-lon
ERA5_lon = ((dv.lon - 15.59) / 0.1).astype('int').values
ERA5_lat = ((dv.lat - 68.35) / 0.1).astype('int').values

In [None]:
dv['ERA5_lon_index'] = ERA5_lon
dv['ERA5_lat_index'] = ERA5_lat

In [None]:
dv

# ERA5-land data from 2015-01-01 to 2019-12-31 1 already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [None]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [None]:
ERA5land = xr.open_dataset(store.open(s3path))

In [None]:
ERA5land

# Adding columns with the ERA5-land longitude and latitude to dv

In [None]:
dv['ERA5_lon'] = ERA5land.longitude[dv['ERA5_lon_index'].values].values

In [None]:
dv['ERA5_lat'] = ERA5land.latitude[dv['ERA5_lat_index'].values].values

In [None]:
dv

In [None]:
dv['ERA5_index'] = (dv['ERA5_lon'] * 100).astype('int') + ((dv['ERA5_lat'] * 100).astype('int') / 10000)

In [None]:
dv

In [None]:
# Year 2015 - first 10 days
import pandas as pd
saison = pd.date_range("2015-01-01", periods=10*24, freq="1H")

In [None]:
saison

In [None]:
dg = ERA5land.sel(time = saison).to_dataframe()

In [None]:
dg = dg.reset_index()

In [None]:
dg

In [None]:
dg['ERA5_index'] = (dg['longitude'] * 100).astype('int') + ((dg['latitude'] * 100).astype('int') / 10000)

In [None]:
dg

In [None]:
dw = vaex.from_pandas(dg)

In [None]:
dw = dw.drop('longitude').drop('latitude').drop('time')

In [None]:
dw

In [None]:
dv = dv.drop('ERA5_lon_index').drop('ERA5_lat_index').drop('ERA5_lon').drop('ERA5_lat')

In [None]:
dv

# Merge into dv columns related ERA5-land data (for a predefined period of time) to each lichen location

## That is not right

In [None]:
dv.join(dw, left_on='ERA5_index', right_on='ERA5_index', allow_duplication=True)

In [None]:
dw.groupby(by='ERA5_index')

In [None]:
dg = ERA5land[['longitude','latitude']].to_dataframe()

In [None]:
dg = dg.reset_index()

In [None]:
dw = vaex.from_pandas(dg)

In [None]:
dw["h3"] = dw.apply(lat_lon_to_h3, [dw.latitude, dw.longitude])

In [None]:
dw

In [None]:
dv

In [None]:
dw[(dw['h3'] == "8c42d0509a44dff")]

In [None]:
dfall = dg.join(dw, how='right',left_on='h3',right_on='h3', allow_duplication=True)

In [None]:
dfall

In [None]:
import pys2index