# Lichen fractional cover statistics 
# Prepares labelled input for the Machine Learning algorithm
# (i.e. locations where moss&lichen fractional cover changes can be related to meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover
## Percentage of 100m pixel that is covered by a specific class of land cover
## Valid values 0-100, 200 = masked sea, 255 = missing

### This notebook uses TensorFlow NGC Container Release 23.03-tf2-py3
### (https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorflow)
### and runs on a machine with 4x ARM CPUs (Neoverse N1) and 24GB RAM

In [1]:
!date

Fri Apr 14 13:36:50 UTC 2023


In [2]:
pip install xarray h5netcdf s3fs tables

Collecting xarray
  Using cached xarray-2023.1.0-py3-none-any.whl (973 kB)
Collecting h5netcdf
  Using cached h5netcdf-1.1.0-py2.py3-none-any.whl (26 kB)
Collecting s3fs
  Using cached s3fs-2023.4.0-py3-none-any.whl (28 kB)
Collecting fsspec==2023.4.0
  Using cached fsspec-2023.4.0-py3-none-any.whl (153 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1
  Using cached aiohttp-3.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.0 MB)
Collecting aiobotocore~=2.5.0
  Using cached aiobotocore-2.5.0-py3-none-any.whl (72 kB)
Collecting aioitertools>=0.5.1
  Using cached aioitertools-0.11.0-py3-none-any.whl (23 kB)
Collecting botocore<1.29.77,>=1.29.76
  Using cached botocore-1.29.76-py3-none-any.whl (10.4 MB)
Collecting frozenlist>=1.1.1
  Using cached frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (162 kB)
Collecting yarl<2.0,>=1.0
  Using cached yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (258 kB)
Collecting async-timeout<5.0,>=

In [3]:
import numpy as np
import os
import pandas as pd
#import s3fs
import xarray as xr
#import vaex

# Input datasets, either from s3 storage or local files

# If data is available locally then jump the following cells

### Define s3 storage parameters

In [4]:
#client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
#store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
#store.ls('Data', detail=True, refresh=True)

## Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [5]:
#s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
# GLC_AOI = xr.open_dataset(store.open(s3path))

## ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [7]:
#s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [8]:
# ERA5land = xr.open_dataset(store.open(s3path))

# Datasets from **local** files
## when running pangeo/ml-notebook with apptainer/singularity the path for the data folder is /home/ubuntu/data

In [9]:
path = '/home/jeani/data/'

In [10]:
# World Land cover data from 2015-01-01 to 2019-12-31- already available as a netCDF file stored locally
GLC_filename = os.path.join(path, 'C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc')
GLC_AOI = xr.open_dataset(GLC_filename)

In [11]:
# ERA5-land data already available as a netCDF file stored locally
ERA5_filename = os.path.join(path, 'reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc')
ERA5land = xr.open_dataset(ERA5_filename)

In [12]:
ERA5land

In [13]:
Latitudes = ERA5land.latitude.to_index()
Longitudes = ERA5land.longitude.to_index()

In [14]:
Latitudes

Float64Index([ 71.1500015258789, 71.05000305175781, 70.94999694824219,
               70.8499984741211,             70.75,  70.6500015258789,
              70.55000305175781, 70.44999694824219,  70.3499984741211,
                          70.25,  70.1500015258789, 70.05000305175781,
              69.94999694824219,  69.8499984741211,             69.75,
               69.6500015258789, 69.55000305175781, 69.44999694824219,
               69.3499984741211,             69.25,  69.1500015258789,
              69.05000305175781, 68.94999694824219,  68.8499984741211,
                          68.75,  68.6500015258789, 68.55000305175781,
              68.44999694824219,  68.3499984741211],
             dtype='float64', name='latitude')

In [15]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [16]:
GLC_AOI

In [17]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Bare_CoverFraction_layer',
                             'Crops_CoverFraction_layer',
                             'Grass_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Shrub_CoverFraction_layer',
                             'Snow_CoverFraction_layer',
                             'Tree_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [18]:
GLC_AOI = GLC_AOI.rename(MossLichen_CoverFraction_layer = 'Lichen')

In [19]:
GLC_AOI

In [20]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon.values, GLC_AOI_max_lon.values, GLC_AOI_min_lat.values, GLC_AOI_max_lat.values)

15.595313502963002 31.06568387333461 68.35153627792579 71.18764738903712


## Use the mask to only keep pixels with lichen **every year**

In [21]:
mask = GLC_AOI['Lichen'].where((GLC_AOI['Lichen'] > 0) & (GLC_AOI['Lichen'] <= 100))

In [22]:
mask = xr.where(mask > 0, 1, 0)

In [23]:
mask = mask.sum(dim = 'time', min_count = 5, skipna=True)

In [24]:
mask = xr.where(mask >= 1, 1, 0)

In [25]:
mask

In [26]:
mask.sum()

In [27]:
de = GLC_AOI.where(mask == 1)

In [28]:
de

In [29]:
de = de.to_dataframe()

In [30]:
de = de.dropna()

In [31]:
de = de.reset_index()

In [32]:
de

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.167277,25.807351,3.0
1,2015-01-01,71.167277,25.808276,3.0
2,2015-01-01,71.167277,25.809202,3.0
3,2015-01-01,71.166351,25.745314,10.0
4,2015-01-01,71.166351,25.746239,10.0
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,11.0
2497147,2019-01-01,68.352462,19.338832,5.0
2497148,2019-01-01,68.352462,19.339758,5.0
2497149,2019-01-01,68.352462,19.987906,4.0


# Run from here until the end for each year (2015, 2016, 2017 and 2018)

In [33]:
# Each year in a separate dataset and keep only the first 365 days
Year = 2015
Number_of_days = 365
print('x = WLC(' + str(Year)+ ') joined with ERA5land(' + str(Year + 1) + ')')
print('y = WLC(' + str(Year + 1) + ')')

x = WLC(2015) joined with ERA5land(2016)
y = WLC(2016)


In [34]:
# Only keep locations with lichen for the current year
df = de.loc[de['time'] == str(Year) + '-01-01']
dg = de.loc[de['time'] == str(Year + 1) + '-01-01']

In [35]:
df

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.167277,25.807351,3.0
1,2015-01-01,71.167277,25.808276,3.0
2,2015-01-01,71.167277,25.809202,3.0
3,2015-01-01,71.166351,25.745314,10.0
4,2015-01-01,71.166351,25.746239,10.0
...,...,...,...,...
335764,2015-01-01,68.352462,18.714758,1.0
335765,2015-01-01,68.352462,18.715684,1.0
335766,2015-01-01,68.352462,19.331425,1.0
335767,2015-01-01,68.352462,19.332351,1.0


In [36]:
dg

Unnamed: 0,time,lat,lon,Lichen
335769,2016-01-01,71.166351,25.745314,6.0
335770,2016-01-01,71.166351,25.746239,6.0
335771,2016-01-01,71.166351,25.747165,6.0
335772,2016-01-01,71.166351,25.765684,6.0
335773,2016-01-01,71.166351,25.766610,6.0
...,...,...,...,...
840025,2016-01-01,68.352462,19.331425,3.0
840026,2016-01-01,68.352462,19.332351,3.0
840027,2016-01-01,68.352462,19.333276,3.0
840028,2016-01-01,68.352462,19.338832,1.0


In [37]:
df.loc[:, 'Lichen'] 

0          3.0
1          3.0
2          3.0
3         10.0
4         10.0
          ... 
335764     1.0
335765     1.0
335766     1.0
335767     1.0
335768     1.0
Name: Lichen, Length: 335769, dtype: float32

In [38]:
# Normalize the fractional cover
x = df.loc[:, 'Lichen'].div(100)
y = dg['Lichen'].div(100)
df.loc[:, 'Lichen'] = x
dg.loc[:, 'Lichen'] = y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Lichen'] = x
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dg.loc[:, 'Lichen'] = y


In [39]:
df

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.167277,25.807351,0.03
1,2015-01-01,71.167277,25.808276,0.03
2,2015-01-01,71.167277,25.809202,0.03
3,2015-01-01,71.166351,25.745314,0.10
4,2015-01-01,71.166351,25.746239,0.10
...,...,...,...,...
335764,2015-01-01,68.352462,18.714758,0.01
335765,2015-01-01,68.352462,18.715684,0.01
335766,2015-01-01,68.352462,19.331425,0.01
335767,2015-01-01,68.352462,19.332351,0.01


In [40]:
dg

Unnamed: 0,time,lat,lon,Lichen
335769,2016-01-01,71.166351,25.745314,0.06
335770,2016-01-01,71.166351,25.746239,0.06
335771,2016-01-01,71.166351,25.747165,0.06
335772,2016-01-01,71.166351,25.765684,0.06
335773,2016-01-01,71.166351,25.766610,0.06
...,...,...,...,...
840025,2016-01-01,68.352462,19.331425,0.03
840026,2016-01-01,68.352462,19.332351,0.03
840027,2016-01-01,68.352462,19.333276,0.03
840028,2016-01-01,68.352462,19.338832,0.01


In [41]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
df['ERA5_lon_index'] = ((df.lon - 15.59) / 0.1).astype('int').values
df['ERA5_lat_index'] = 28 - ((df.lat - 68.35) / 0.1).astype('int').values
dg['ERA5_lon_index'] = ((dg.lon - 15.59) / 0.1).astype('int').values
dg['ERA5_lat_index'] = 28 - ((dg.lat - 68.35) / 0.1).astype('int').values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ERA5_lon_index'] = ((df.lon - 15.59) / 0.1).astype('int').values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ERA5_lat_index'] = 28 - ((df.lat - 68.35) / 0.1).astype('int').values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dg['ERA5_lon_index'] = ((dg.lon - 15.59) / 0.1).astype('int').

In [42]:
df

Unnamed: 0,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index
0,2015-01-01,71.167277,25.807351,0.03,102,0
1,2015-01-01,71.167277,25.808276,0.03,102,0
2,2015-01-01,71.167277,25.809202,0.03,102,0
3,2015-01-01,71.166351,25.745314,0.10,101,0
4,2015-01-01,71.166351,25.746239,0.10,101,0
...,...,...,...,...,...,...
335764,2015-01-01,68.352462,18.714758,0.01,31,28
335765,2015-01-01,68.352462,18.715684,0.01,31,28
335766,2015-01-01,68.352462,19.331425,0.01,37,28
335767,2015-01-01,68.352462,19.332351,0.01,37,28


# Adding columns with the ERA5-land longitude and latitude to dv

In [43]:
df['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[df['ERA5_lon_index'].values].values
df['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[df['ERA5_lat_index'].values].values
dg['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dg['ERA5_lon_index'].values].values
dg['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dg['ERA5_lat_index'].values].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[df['ERA5_lon_index'].values].values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[df['ERA5_lat_index'].values].values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dg['ERA5_lon'] =

In [44]:
# Add combined lon_lat column to dv x & y
df['lon_lat'] = (df['ERA5_lon'] * 100).astype('int') + (df['ERA5_lat'] * 100).astype('int') / 100000
dg['lon_lat'] = (dg['ERA5_lon'] * 100).astype('int') + (dg['ERA5_lat'] * 100).astype('int') / 100000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lon_lat'] = (df['ERA5_lon'] * 100).astype('int') + (df['ERA5_lat'] * 100).astype('int') / 100000
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dg['lon_lat'] = (dg['ERA5_lon'] * 100).astype('int') + (dg['ERA5_lat'] * 100).astype('int') / 100000


In [45]:
df

Unnamed: 0,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lon_lat
0,2015-01-01,71.167277,25.807351,0.03,102,0,25.790001,71.150002,2579.07115
1,2015-01-01,71.167277,25.808276,0.03,102,0,25.790001,71.150002,2579.07115
2,2015-01-01,71.167277,25.809202,0.03,102,0,25.790001,71.150002,2579.07115
3,2015-01-01,71.166351,25.745314,0.10,101,0,25.690001,71.150002,2569.07115
4,2015-01-01,71.166351,25.746239,0.10,101,0,25.690001,71.150002,2569.07115
...,...,...,...,...,...,...,...,...,...
335764,2015-01-01,68.352462,18.714758,0.01,31,28,18.690001,68.349998,1869.06835
335765,2015-01-01,68.352462,18.715684,0.01,31,28,18.690001,68.349998,1869.06835
335766,2015-01-01,68.352462,19.331425,0.01,37,28,19.290001,68.349998,1929.06835
335767,2015-01-01,68.352462,19.332351,0.01,37,28,19.290001,68.349998,1929.06835


In [46]:
df = df.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])
dg = dg.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])

In [47]:
df

Unnamed: 0,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
0,0.03,102,0,2579.07115
1,0.03,102,0,2579.07115
2,0.03,102,0,2579.07115
3,0.10,101,0,2569.07115
4,0.10,101,0,2569.07115
...,...,...,...,...
335764,0.01,31,28,1869.06835
335765,0.01,31,28,1869.06835
335766,0.01,37,28,1929.06835
335767,0.01,37,28,1929.06835


In [48]:
# Count the number of non-null lichen pixels per ERA5 grid cell
Nx = df.groupby(['lon_lat']).count()
Ny = dg.groupby(['lon_lat']).count()

In [49]:
Nx

Unnamed: 0_level_0,Lichen,ERA5_lon_index,ERA5_lat_index
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1559.06835,92,92,92
1559.06844,447,447,447
1559.06855,440,440,440
1559.06865,267,267,267
1559.06875,184,184,184
...,...,...,...
3099.06944,30,30,30
3099.06955,2,2,2
3099.06965,175,175,175
3099.07025,8,8,8


In [50]:
Nx = Nx['Lichen'].values
Ny = Ny['Lichen'].values

In [51]:
Nx

array([ 92, 447, 440, ..., 175,   8,   5])

## Calculate mean fractional cover for each ERA5-land grid cell

In [52]:
dmx = df.groupby(by='lon_lat').agg('mean')
dmy = dg.groupby(by='lon_lat').agg('mean')

In [53]:
dmx

Unnamed: 0_level_0,Lichen,ERA5_lon_index,ERA5_lat_index
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1559.06835,0.137065,0.0,28.0
1559.06844,0.211879,0.0,27.0
1559.06855,0.211091,0.0,26.0
1559.06865,0.146966,0.0,25.0
1559.06875,0.101087,0.0,24.0
...,...,...,...
3099.06944,0.266667,154.0,17.0
3099.06955,0.210000,154.0,16.0
3099.06965,0.227543,154.0,15.0
3099.07025,0.010000,154.0,9.0


In [54]:
dmx['N'] = Nx.astype('int')
dmy['N'] = Ny.astype('int')

In [55]:
dmx

Unnamed: 0_level_0,Lichen,ERA5_lon_index,ERA5_lat_index,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1559.06835,0.137065,0.0,28.0,92
1559.06844,0.211879,0.0,27.0,447
1559.06855,0.211091,0.0,26.0,440
1559.06865,0.146966,0.0,25.0,267
1559.06875,0.101087,0.0,24.0,184
...,...,...,...,...
3099.06944,0.266667,154.0,17.0,30
3099.06955,0.210000,154.0,16.0,2
3099.06965,0.227543,154.0,15.0,175
3099.07025,0.010000,154.0,9.0,8


In [56]:
dmy

Unnamed: 0_level_0,Lichen,ERA5_lon_index,ERA5_lat_index,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1559.06835,0.078117,0.0,28.0,308
1559.06844,0.136917,0.0,27.0,652
1559.06855,0.159610,0.0,26.0,616
1559.06865,0.113471,0.0,25.0,291
1559.06875,0.054727,0.0,24.0,366
...,...,...,...,...
3099.06944,0.191600,154.0,17.0,25
3099.06955,0.046667,154.0,16.0,21
3099.06965,0.223909,154.0,15.0,307
3099.07025,0.141250,154.0,9.0,16


In [57]:
dmx['ERA5_lon_index'] = dmx['ERA5_lon_index'].astype('int')
dmx['ERA5_lat_index'] = dmx['ERA5_lat_index'].astype('int')

In [58]:
dmy = dmy.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [59]:
dmx

Unnamed: 0_level_0,Lichen,ERA5_lon_index,ERA5_lat_index,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1559.06835,0.137065,0,28,92
1559.06844,0.211879,0,27,447
1559.06855,0.211091,0,26,440
1559.06865,0.146966,0,25,267
1559.06875,0.101087,0,24,184
...,...,...,...,...
3099.06944,0.266667,154,17,30
3099.06955,0.210000,154,16,2
3099.06965,0.227543,154,15,175
3099.07025,0.010000,154,9,8


In [60]:
dmy

Unnamed: 0_level_0,Lichen,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1
1559.06835,0.078117,308
1559.06844,0.136917,652
1559.06855,0.159610,616
1559.06865,0.113471,291
1559.06875,0.054727,366
...,...,...
3099.06944,0.191600,25
3099.06955,0.046667,21
3099.06965,0.223909,307
3099.07025,0.141250,16


In [61]:
dmx['ERA5_lon'] = Longitudes[dmx['ERA5_lon_index'].values].values
dmx['ERA5_lat'] = Latitudes[dmx['ERA5_lat_index'].values].values

In [62]:
dmx = dmx.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [63]:
dmx

Unnamed: 0_level_0,Lichen,N,ERA5_lon,ERA5_lat
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1559.06835,0.137065,92,15.59,68.349998
1559.06844,0.211879,447,15.59,68.449997
1559.06855,0.211091,440,15.59,68.550003
1559.06865,0.146966,267,15.59,68.650002
1559.06875,0.101087,184,15.59,68.750000
...,...,...,...,...
3099.06944,0.266667,30,30.99,69.449997
3099.06955,0.210000,2,30.99,69.550003
3099.06965,0.227543,175,30.99,69.650002
3099.07025,0.010000,8,30.99,70.250000


## Extract ERA5 data for  the selected period of the year (when RoS events mostly occur)

In [64]:
ERA5 = ERA5land.sel(time=slice(str(Year + 1) + '-01-01', str(Year + 1)  + '-12-31'))

In [65]:
ERA5 = ERA5.isel(time=range(Number_of_days * 24))

In [66]:
# When using ERA%-land for recent years there is an additional expver - Not used until 2019
# ERA5 = ERA5.isel(expver = 0)

In [67]:
ERA5

In [68]:
# Extract ERA5 t2m, tp and sd fields 
ERA5_t2m = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['t2m']
ERA5_tp = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['tp']
#ERA5_sd = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['sd']

In [69]:
ERA5_t2m

## Rain on Snow criteria (according to https://www.hydrol-earth-syst-sci.net/23/2983/2019/hess-23-2983-2019.pdf)
 * total rainfall volume of at least 20 mm within 12 h
### or 
 * air temperatures above 0C (273.15K)
 * and initial snowpack depth of at least 10 cm

In [70]:
# Normalizing temperature, total precipitation and snow depth values accordidng to these criteria
ERA5_t2m = ERA5_t2m / 273.15
ERA5_tp = ERA5_tp / 0.02 * 12.
#ERA5_sd = ERA5_sd / 0.1

In [71]:
dh_t2m = ERA5_t2m.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_tp = ERA5_tp.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
#dh_sd = ERA5_sd.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [72]:
dh_tp = dh_tp.drop(columns=['latitude', 'longitude'])
#dh_sd = dh_sd.drop(columns=['latitude', 'longitude'])

In [73]:
# Create labels for ERA5-land variables to replace the dates
label_t2m = ['latitude', 'longitude']
label_tp = list()
#label_sd = list()
for i in range(Number_of_days * 24):
    label_t2m.append('t2m_'+ str(i))
    label_tp.append('tp_'+ str(i))
#    label_sd.append('sd_'+ str(i))

In [74]:
dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
dh_tp.set_axis(label_tp, axis="columns", inplace=True)
#dh_sd.set_axis(label_sd, axis="columns", inplace=True)

  dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
  dh_tp.set_axis(label_tp, axis="columns", inplace=True)


In [75]:
dh_t2m

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,t2m_8750,t2m_8751,t2m_8752,t2m_8753,t2m_8754,t2m_8755,t2m_8756,t2m_8757,t2m_8758,t2m_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.986783,0.985853,0.984817,0.983796,0.982589,0.981640,0.980055,0.980477,...,0.988387,0.990788,0.996695,1.001827,1.003111,1.004301,1.002994,1.001283,0.999472,0.997520
4491,68.349998,30.690001,0.986696,0.985762,0.984746,0.983745,0.982549,0.981626,0.980062,0.980548,...,0.987828,0.990140,0.995612,1.001088,1.002614,1.003781,1.002586,1.000915,0.999191,0.997283
4492,68.349998,30.790001,0.986581,0.985651,0.984662,0.983674,0.982492,0.981604,0.980067,0.980610,...,0.986778,0.988991,0.993976,0.999723,1.001476,1.002661,1.001618,1.000040,0.998441,0.996623
4493,68.349998,30.889999,0.986530,0.985611,0.984657,0.983692,0.982534,0.981686,0.980202,0.980768,...,0.984513,0.986632,0.990903,0.996581,0.998494,0.999819,0.999149,0.997853,0.996515,0.994986


In [76]:
dh_tp

Unnamed: 0,tp_0,tp_1,tp_2,tp_3,tp_4,tp_5,tp_6,tp_7,tp_8,tp_9,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.126586,0.001552,0.002660,0.004433,0.006872,0.009089,0.011306,0.013966,0.016183,0.018179,...,2.484946,3.507168,3.872517,3.915968,3.917298,3.927718,3.989570,4.041003,4.046102,4.046767
4491,0.115280,0.001552,0.002660,0.004212,0.006207,0.008424,0.010419,0.013080,0.015075,0.016848,...,2.428414,3.477461,3.878946,3.929270,3.931043,3.942128,4.009079,4.072483,4.078469,4.079355
4492,0.104860,0.001552,0.002660,0.004212,0.005985,0.007759,0.009754,0.012193,0.013966,0.016183,...,2.372770,3.449085,3.886261,3.943902,3.946118,3.957868,4.029696,4.104629,4.112166,4.112831
4493,0.095327,0.001552,0.002660,0.004212,0.005764,0.007316,0.008867,0.011528,0.013523,0.015296,...,2.310696,3.408737,3.880276,3.946340,3.949001,3.961194,4.037456,4.125467,4.135000,4.136108


In [77]:
#  Glue together dh_t2m and dh_tp  <- not dh_sd
dh = pd.concat([dh_t2m, dh_tp], axis = 1)

In [78]:
dh

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.986783,0.985853,0.984817,0.983796,0.982589,0.981640,0.980055,0.980477,...,2.484946,3.507168,3.872517,3.915968,3.917298,3.927718,3.989570,4.041003,4.046102,4.046767
4491,68.349998,30.690001,0.986696,0.985762,0.984746,0.983745,0.982549,0.981626,0.980062,0.980548,...,2.428414,3.477461,3.878946,3.929270,3.931043,3.942128,4.009079,4.072483,4.078469,4.079355
4492,68.349998,30.790001,0.986581,0.985651,0.984662,0.983674,0.982492,0.981604,0.980067,0.980610,...,2.372770,3.449085,3.886261,3.943902,3.946118,3.957868,4.029696,4.104629,4.112166,4.112831
4493,68.349998,30.889999,0.986530,0.985611,0.984657,0.983692,0.982534,0.981686,0.980202,0.980768,...,2.310696,3.408737,3.880276,3.946340,3.949001,3.961194,4.037456,4.125467,4.135000,4.136108


In [79]:
# Add combined lon_lat column to dh
dh['ERA5_lon_lat'] = (dh['longitude'] * 100).astype('int') + (dh['latitude'] * 100).astype('int') / 100000

In [80]:
# Drop latitude and longitude columns which are not used anymore in dh
dh = dh.drop(columns=['latitude', 'longitude'])

In [81]:
dh

Unnamed: 0,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,t2m_9,...,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759,ERA5_lon_lat
0,,,,,,,,,,,...,,,,,,,,,,1559.07115
1,,,,,,,,,,,...,,,,,,,,,,1568.07115
2,,,,,,,,,,,...,,,,,,,,,,1578.07115
3,,,,,,,,,,,...,,,,,,,,,,1589.07115
4,,,,,,,,,,,...,,,,,,,,,,1598.07115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.986783,0.985853,0.984817,0.983796,0.982589,0.981640,0.980055,0.980477,0.979683,0.979024,...,3.507168,3.872517,3.915968,3.917298,3.927718,3.989570,4.041003,4.046102,4.046767,3059.06834
4491,0.986696,0.985762,0.984746,0.983745,0.982549,0.981626,0.980062,0.980548,0.979769,0.979130,...,3.477461,3.878946,3.929270,3.931043,3.942128,4.009079,4.072483,4.078469,4.079355,3069.06834
4492,0.986581,0.985651,0.984662,0.983674,0.982492,0.981604,0.980067,0.980610,0.979840,0.979221,...,3.449085,3.886261,3.943902,3.946118,3.957868,4.029696,4.104629,4.112166,4.112831,3079.06834
4493,0.986530,0.985611,0.984657,0.983692,0.982534,0.981686,0.980202,0.980768,0.980007,0.979407,...,3.408737,3.880276,3.946340,3.949001,3.961194,4.037456,4.125467,4.135000,4.136108,3088.06834


In [82]:
dmx

Unnamed: 0_level_0,Lichen,N,ERA5_lon,ERA5_lat
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1559.06835,0.137065,92,15.59,68.349998
1559.06844,0.211879,447,15.59,68.449997
1559.06855,0.211091,440,15.59,68.550003
1559.06865,0.146966,267,15.59,68.650002
1559.06875,0.101087,184,15.59,68.750000
...,...,...,...,...
3099.06944,0.266667,30,30.99,69.449997
3099.06955,0.210000,2,30.99,69.550003
3099.06965,0.227543,175,30.99,69.650002
3099.07025,0.010000,8,30.99,70.250000


In [83]:
dmx = dmx.drop(columns=['ERA5_lon', 'ERA5_lat'])

## Join dwx (WLC) with dh (ERA5 t2m-tp-sd)

In [84]:
dmx

Unnamed: 0_level_0,Lichen,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1
1559.06835,0.137065,92
1559.06844,0.211879,447
1559.06855,0.211091,440
1559.06865,0.146966,267
1559.06875,0.101087,184
...,...,...
3099.06944,0.266667,30
3099.06955,0.210000,2
3099.06965,0.227543,175
3099.07025,0.010000,8


In [85]:
# Join dwx (WLC) with df (ERA5 t2m-tp)
dx = dmx.join(dh.set_index('ERA5_lon_lat'), on='lon_lat')

In [86]:
dx

Unnamed: 0_level_0,Lichen,N,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1559.06835,0.137065,92,,,,,,,,,...,,,,,,,,,,
1559.06844,0.211879,447,0.996191,0.994671,0.995452,0.996635,0.998929,1.000566,1.001505,1.000109,...,10.005406,10.254145,10.396692,10.574267,10.729009,10.885302,11.148229,11.369034,11.526213,11.649918
1559.06855,0.211091,440,0.989355,0.988203,0.990508,0.992543,0.995687,0.997859,0.999191,0.997569,...,9.926263,10.141747,10.222220,10.329963,10.445686,10.567839,10.807487,10.978191,11.081942,11.157539
1559.06865,0.146966,267,0.986124,0.986423,0.989102,0.991085,0.994673,0.997041,0.998751,0.997278,...,9.926263,10.130219,10.189189,10.251927,10.335948,10.472290,10.726792,10.901264,10.994152,11.046915
1559.06875,0.101087,184,0.990460,0.993171,0.994899,0.995616,0.997606,0.999077,1.000438,0.999122,...,9.752678,9.927814,9.978580,10.030680,10.094970,10.233084,10.463865,10.620380,10.697972,10.733222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3099.06944,0.266667,30,,,,,,,,,...,,,,,,,,,,
3099.06955,0.210000,2,,,,,,,,,...,,,,,,,,,,
3099.06965,0.227543,175,,,,,,,,,...,,,,,,,,,,
3099.07025,0.010000,8,,,,,,,,,...,,,,,,,,,,


In [87]:
# Drop the Rows with NaN Values
dx = dx.dropna()

In [88]:
dx = dx.reset_index()

In [89]:
dx

Unnamed: 0,lon_lat,Lichen,N,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,1559.06844,0.211879,447,0.996191,0.994671,0.995452,0.996635,0.998929,1.000566,1.001505,...,10.005406,10.254145,10.396692,10.574267,10.729009,10.885302,11.148229,11.369034,11.526213,11.649918
1,1559.06855,0.211091,440,0.989355,0.988203,0.990508,0.992543,0.995687,0.997859,0.999191,...,9.926263,10.141747,10.222220,10.329963,10.445686,10.567839,10.807487,10.978191,11.081942,11.157539
2,1559.06865,0.146966,267,0.986124,0.986423,0.989102,0.991085,0.994673,0.997041,0.998751,...,9.926263,10.130219,10.189189,10.251927,10.335948,10.472290,10.726792,10.901264,10.994152,11.046915
3,1559.06894,0.066429,84,1.004145,1.005221,1.006582,1.006018,1.005386,1.005188,1.005679,...,8.793637,8.862141,8.899606,8.977199,9.030182,9.149010,9.216183,9.261408,9.292666,9.308185
4,1589.06844,0.135455,77,0.992889,0.989019,0.990273,0.992911,0.996197,0.998616,1.000043,...,9.740263,10.078786,10.229980,10.329298,10.505321,10.631908,10.913235,11.092362,11.228038,11.349302
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
734,3079.06944,0.190000,62,0.989532,0.989124,0.988256,0.987347,0.986037,0.985596,0.983969,...,1.255442,2.074594,2.639687,2.899732,2.986192,3.134947,3.333583,3.571680,3.744601,3.875177
735,3079.06955,0.165333,45,0.991210,0.990799,0.990056,0.989213,0.987997,0.987482,0.985902,...,1.097597,1.872854,2.419325,2.678262,2.775806,2.953382,3.158669,3.408958,3.597619,3.752138
736,3079.06965,0.248843,268,0.993679,0.993266,0.992661,0.991882,0.990746,0.990151,0.988636,...,0.958596,1.699048,2.255273,2.518865,2.632371,2.834776,3.047157,3.301660,3.501183,3.678979
737,3079.06975,0.276389,36,0.996222,0.995807,0.995357,0.994664,0.993628,0.992920,0.991478,...,0.839104,1.552953,2.131791,2.403363,2.532610,2.755189,2.973112,3.226506,3.433345,3.631981


In [90]:
## Save into **local** HDF5 file without index
x_filename = os.path.join(path, 'x_mean_tp1_' + str(Year) + '.hdf')
print(x_filename)
dx.to_hdf(x_filename, key='df', mode="w", index=False)

/home/jeani/data/x_mean_tp1_2015.hdf


## Find locations with lichen in the following year corresponding to those in current year

In [91]:
dmx = dx[['lon_lat']]

In [92]:
dmx

Unnamed: 0,lon_lat
0,1559.06844
1,1559.06855
2,1559.06865
3,1559.06894
4,1589.06844
...,...
734,3079.06944
735,3079.06955
736,3079.06965
737,3079.06975


In [93]:
dmy = dmy.reset_index()

In [94]:
dmy

Unnamed: 0,lon_lat,Lichen,N
0,1559.06835,0.078117,308
1,1559.06844,0.136917,652
2,1559.06855,0.159610,616
3,1559.06865,0.113471,291
4,1559.06875,0.054727,366
...,...,...,...
2477,3099.06944,0.191600,25
2478,3099.06955,0.046667,21
2479,3099.06965,0.223909,307
2480,3099.07025,0.141250,16


In [95]:
## Join dwx with dwy
dy = dmx.join(dmy.set_index('lon_lat'), on='lon_lat')

In [96]:
dy

Unnamed: 0,lon_lat,Lichen,N
0,1559.06844,0.136917,652.0
1,1559.06855,0.159610,616.0
2,1559.06865,0.113471,291.0
3,1559.06894,0.045974,154.0
4,1589.06844,0.117667,120.0
...,...,...,...
734,3079.06944,0.206731,104.0
735,3079.06955,0.106735,98.0
736,3079.06965,0.232222,207.0
737,3079.06975,0.221667,72.0


In [97]:
dy.fillna(0)

Unnamed: 0,lon_lat,Lichen,N
0,1559.06844,0.136917,652.0
1,1559.06855,0.159610,616.0
2,1559.06865,0.113471,291.0
3,1559.06894,0.045974,154.0
4,1589.06844,0.117667,120.0
...,...,...,...
734,3079.06944,0.206731,104.0
735,3079.06955,0.106735,98.0
736,3079.06965,0.232222,207.0
737,3079.06975,0.221667,72.0


In [98]:
dy.rename(columns = {'Lichen' : 'new_Lichen', 'N' : 'new_N'}, inplace = True)

In [99]:
dy

Unnamed: 0,lon_lat,new_Lichen,new_N
0,1559.06844,0.136917,652.0
1,1559.06855,0.159610,616.0
2,1559.06865,0.113471,291.0
3,1559.06894,0.045974,154.0
4,1589.06844,0.117667,120.0
...,...,...,...
734,3079.06944,0.206731,104.0
735,3079.06955,0.106735,98.0
736,3079.06965,0.232222,207.0
737,3079.06975,0.221667,72.0


In [100]:
## Save into **local** HDF5 file without index
y_filename = os.path.join(path, 'y_mean_tp1_' + str(Year) + '.hdf')
print(y_filename)
dy.to_hdf(y_filename, key='dg', mode="w", index=False)

/home/jeani/data/y_mean_tp1_2015.hdf


In [101]:
print('Finished!')

Finished!
