# Lichen fractional cover statistics 
# Prepares labelled input for the Machine Learning algorithm
# (i.e. locations where moss&lichen fractional cover changes can be related to meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover
## Percentage of 100m pixel that is covered by a specific class of land cover
## Valid values 0-100, 200 = masked sea, 255 = missing

In [1]:
!date

Wed Apr 12 09:30:34 UTC 2023


In [2]:
pip install netcdf4 s3fs tables vaex

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import os
import pandas as pd
import s3fs
import xarray as xr
import vaex

# Input datasets, either from s3 storage or local files

# If data is available locally then jump the following cells

### Define s3 storage parameters

In [4]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 3, 10, 11, 50, 32, 549000, tzinfo=tzlocal()),
  'ETag': '"d082ee07a1ab33

## Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [5]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
# GLC_AOI = xr.open_dataset(store.open(s3path))

## ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [7]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [8]:
# ERA5land = xr.open_dataset(store.open(s3path))

# Datasets from **local** files
## when running pangeo/ml-notebook with apptainer/singularity the path for the data folder is /home/ubuntu/data

In [12]:
path = '/home/ubuntu/data/'

In [13]:
# World Land cover data from 2015-01-01 to 2019-12-31- already available as a netCDF file stored locally
GLC_filename = os.path.join(path, 'C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc')
GLC_AOI = xr.open_dataset(GLC_filename, engine = 'netcdf4')

In [14]:
# ERA5-land data already available as a netCDF file stored locally
ERA5_filename = os.path.join(path, 'reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc')
ERA5land = xr.open_dataset(ERA5_filename, engine = 'netcdf4')

In [15]:
ERA5land

In [16]:
Latitudes = ERA5land.latitude.to_index()
Longitudes = ERA5land.longitude.to_index()

In [17]:
Latitudes

Float64Index([ 71.1500015258789, 71.05000305175781, 70.94999694824219,
               70.8499984741211,             70.75,  70.6500015258789,
              70.55000305175781, 70.44999694824219,  70.3499984741211,
                          70.25,  70.1500015258789, 70.05000305175781,
              69.94999694824219,  69.8499984741211,             69.75,
               69.6500015258789, 69.55000305175781, 69.44999694824219,
               69.3499984741211,             69.25,  69.1500015258789,
              69.05000305175781, 68.94999694824219,  68.8499984741211,
                          68.75,  68.6500015258789, 68.55000305175781,
              68.44999694824219,  68.3499984741211],
             dtype='float64', name='latitude')

In [18]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [19]:
GLC_AOI

In [20]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Bare_CoverFraction_layer',
                             'Crops_CoverFraction_layer',
                             'Grass_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Shrub_CoverFraction_layer',
                             'Snow_CoverFraction_layer',
                             'Tree_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [21]:
GLC_AOI = GLC_AOI.rename(MossLichen_CoverFraction_layer = 'Lichen')

In [22]:
GLC_AOI

In [23]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon.values, GLC_AOI_max_lon.values, GLC_AOI_min_lat.values, GLC_AOI_max_lat.values)

15.595313502963002 31.06568387333461 68.35153627792579 71.18764738903712


## Use the mask to only keep pixels with lichen **every year**

In [24]:
mask = GLC_AOI['Lichen'].where((GLC_AOI['Lichen'] > 0) & (GLC_AOI['Lichen'] <= 100))

In [25]:
mask = xr.where(mask > 0, 1, 0)

In [26]:
mask = mask.sum(dim = 'time', min_count = 5, skipna=True)

In [27]:
mask = xr.where(mask >= 1, 1, 0)

In [28]:
mask

In [29]:
mask.sum()

In [30]:
de = GLC_AOI.where(mask == 1)

In [31]:
de

In [32]:
de = de.to_dataframe()

In [33]:
de = de.dropna()

In [34]:
de = de.reset_index()

In [35]:
de

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.167277,25.807351,3.0
1,2015-01-01,71.167277,25.808276,3.0
2,2015-01-01,71.167277,25.809202,3.0
3,2015-01-01,71.166351,25.745314,10.0
4,2015-01-01,71.166351,25.746239,10.0
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,11.0
2497147,2019-01-01,68.352462,19.338832,5.0
2497148,2019-01-01,68.352462,19.339758,5.0
2497149,2019-01-01,68.352462,19.987906,4.0


# Run from here until the end for each year (2015, 2016, 2017 and 2018)

In [247]:
# Each year in a separate dataset and keep only the first 365 days
Year = 2018
Number_of_days = 365
print('x = WLC(' + str(Year)+ ') joined with ERA5land(' + str(Year + 1) + ')')
print('y = WLC(' + str(Year + 1) + ')')

x = WLC(2018) joined with ERA5land(2019)
y = WLC(2019)


In [248]:
# Only keep locations with lichen for the current year
df = de.loc[de['time'] == str(Year) + '-01-01']
dg = de.loc[de['time'] == str(Year + 1) + '-01-01']

In [249]:
df

Unnamed: 0,time,lat,lon,Lichen
1349521,2018-01-01,71.169129,25.779573,7.0
1349522,2018-01-01,71.169129,25.780499,7.0
1349523,2018-01-01,71.169129,25.781425,7.0
1349524,2018-01-01,71.166351,25.810128,3.0
1349525,2018-01-01,71.166351,25.811054,3.0
...,...,...,...,...
1830447,2018-01-01,68.352462,19.331425,8.0
1830448,2018-01-01,68.352462,19.332351,8.0
1830449,2018-01-01,68.352462,19.333276,8.0
1830450,2018-01-01,68.352462,19.338832,4.0


In [250]:
dg

Unnamed: 0,time,lat,lon,Lichen
1830452,2019-01-01,71.170055,25.782351,8.0
1830453,2019-01-01,71.170055,25.783276,8.0
1830454,2019-01-01,71.169129,25.779573,10.0
1830455,2019-01-01,71.169129,25.780499,10.0
1830456,2019-01-01,71.169129,25.781425,10.0
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,11.0
2497147,2019-01-01,68.352462,19.338832,5.0
2497148,2019-01-01,68.352462,19.339758,5.0
2497149,2019-01-01,68.352462,19.987906,4.0


In [251]:
# Normalize the fractional cover
df['Lichen'] = df['Lichen'].div(100)
dg['Lichen'] = dg['Lichen'].div(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Lichen'] = df['Lichen'].div(100)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dg['Lichen'] = dg['Lichen'].div(100)


In [252]:
df

Unnamed: 0,time,lat,lon,Lichen
1349521,2018-01-01,71.169129,25.779573,0.07
1349522,2018-01-01,71.169129,25.780499,0.07
1349523,2018-01-01,71.169129,25.781425,0.07
1349524,2018-01-01,71.166351,25.810128,0.03
1349525,2018-01-01,71.166351,25.811054,0.03
...,...,...,...,...
1830447,2018-01-01,68.352462,19.331425,0.08
1830448,2018-01-01,68.352462,19.332351,0.08
1830449,2018-01-01,68.352462,19.333276,0.08
1830450,2018-01-01,68.352462,19.338832,0.04


In [253]:
dg

Unnamed: 0,time,lat,lon,Lichen
1830452,2019-01-01,71.170055,25.782351,0.08
1830453,2019-01-01,71.170055,25.783276,0.08
1830454,2019-01-01,71.169129,25.779573,0.10
1830455,2019-01-01,71.169129,25.780499,0.10
1830456,2019-01-01,71.169129,25.781425,0.10
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,0.11
2497147,2019-01-01,68.352462,19.338832,0.05
2497148,2019-01-01,68.352462,19.339758,0.05
2497149,2019-01-01,68.352462,19.987906,0.04


In [254]:
# Convert to VAEX
dvx = vaex.from_pandas(df)
dvy = vaex.from_pandas(dg)

In [255]:
dvx

#,time,lat,lon,Lichen
0,2018-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.07
1,2018-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.07
2,2018-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.07
3,2018-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03
4,2018-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03
...,...,...,...,...
480926,2018-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.08
480927,2018-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.08
480928,2018-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.08
480929,2018-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.04


In [256]:
dvy

#,time,lat,lon,Lichen
0,2019-01-01 00:00:00.000000000,71.17005479644453,25.782350540000856,0.08
1,2019-01-01 00:00:00.000000000,71.17005479644453,25.783276465926782,0.08
2,2019-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.1
3,2019-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.1
4,2019-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.1
...,...,...,...,...
666694,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11
666695,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05
666696,2019-01-01 00:00:00.000000000,68.35246220385172,19.339757947407747,0.05
666697,2019-01-01 00:00:00.000000000,68.35246220385172,19.987906095555946,0.04


In [257]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
dvx['ERA5_lon_index'] = ((dvx.lon - 15.59) / 0.1).astype('int').values
dvx['ERA5_lat_index'] = 28 - ((dvx.lat - 68.35) / 0.1).astype('int').values
dvy['ERA5_lon_index'] = ((dvy.lon - 15.59) / 0.1).astype('int').values
dvy['ERA5_lat_index'] = 28 - ((dvy.lat - 68.35) / 0.1).astype('int').values

In [258]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index
0,2018-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.07,101,0
1,2018-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.07,101,0
2,2018-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.07,101,0
3,2018-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03,102,0
4,2018-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03,102,0
...,...,...,...,...,...,...
480926,2018-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.08,37,28
480927,2018-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.08,37,28
480928,2018-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.08,37,28
480929,2018-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.04,37,28


# Adding columns with the ERA5-land longitude and latitude to dv

In [259]:
dvx['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvx['ERA5_lon_index'].values].values
dvx['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvx['ERA5_lat_index'].values].values
dvy['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvy['ERA5_lon_index'].values].values
dvy['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvy['ERA5_lat_index'].values].values

In [260]:
# Add combined lon_lat column to dv x & y
dvx['lon_lat'] = (dvx['ERA5_lon'] * 100).astype('int') + (dvx['ERA5_lat'] * 100).astype('int') / 100000
dvy['lon_lat'] = (dvy['ERA5_lon'] * 100).astype('int') + (dvy['ERA5_lat'] * 100).astype('int') / 100000

In [261]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lon_lat
0,2018-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.07,101,0,25.69,71.15,2569.07115
1,2018-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.07,101,0,25.69,71.15,2569.07115
2,2018-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.07,101,0,25.69,71.15,2569.07115
3,2018-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03,102,0,25.79,71.15,2579.07115
4,2018-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03,102,0,25.79,71.15,2579.07115
...,...,...,...,...,...,...,...,...,...
480926,2018-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.08,37,28,19.29,68.35,1929.06835
480927,2018-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.08,37,28,19.29,68.35,1929.06835
480928,2018-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.08,37,28,19.29,68.35,1929.06835
480929,2018-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.04,37,28,19.29,68.35,1929.06835


In [262]:
dvx = dvx.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])
dvy = dvy.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])

In [263]:
dvx

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
0,0.07,101,0,2569.07115
1,0.07,101,0,2569.07115
2,0.07,101,0,2569.07115
3,0.03,102,0,2579.07115
4,0.03,102,0,2579.07115
...,...,...,...,...
480926,0.08,37,28,1929.06835
480927,0.08,37,28,1929.06835
480928,0.08,37,28,1929.06835
480929,0.04,37,28,1929.06835


In [264]:
# Count the number of non-null lichen pixels per ERA5 grid cell
Nx = dvx.groupby(['lon_lat']).agg('count')
Ny = dvy.groupby(['lon_lat']).agg('count')

In [265]:
Nx = Nx['count'].values
Ny = Ny['count'].values

In [266]:
Nx

array([ 12, 127,  58, ..., 140, 366,   6])

## Calculate mean fractional cover for each ERA5-land grid cell

In [267]:
dmx = dvx.groupby(by='lon_lat', agg='mean')
dmy = dvy.groupby(by='lon_lat', agg='mean')

In [268]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
0,2459.07055,0.18827586126481666,90.0,6.0
1,2239.07065,0.13291338627321983,68.0,5.0
2,2839.06994,0.15500000352039933,128.0,12.0
3,2479.06935,0.06000000052154064,92.0,18.0
4,2489.06925,0.029999999329447746,93.0,19.0
...,...,...,...,...
2446,3009.07044,0.10564285733604005,145.0,7.0
2447,2229.07055,0.12946180516155437,67.0,6.0
2448,2899.07085,0.1742857141154153,134.0,3.0
2449,2569.06925,0.07666666557391484,101.0,19.0


In [269]:
dmx['N'] = Nx.astype('int')
dmy['N'] = Ny.astype('int')

In [270]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean,N
0,2459.07055,0.18827586126481666,90.0,6.0,12
1,2239.07065,0.13291338627321983,68.0,5.0,127
2,2839.06994,0.15500000352039933,128.0,12.0,58
3,2479.06935,0.06000000052154064,92.0,18.0,6
4,2489.06925,0.029999999329447746,93.0,19.0,423
...,...,...,...,...,...
2446,3009.07044,0.10564285733604005,145.0,7.0,7
2447,2229.07055,0.12946180516155437,67.0,6.0,576
2448,2899.07085,0.1742857141154153,134.0,3.0,140
2449,2569.06925,0.07666666557391484,101.0,19.0,366


In [271]:
dmy

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean,N
0,2239.07065,0.06981481404768096,68.0,5.0,54
1,2839.06994,0.03999999910593033,128.0,12.0,10
2,2499.06965,0.029999999329447746,94.0,15.0,6
3,2459.07055,0.13599999770522117,90.0,6.0,3
4,2479.06935,0.019999999552965164,92.0,18.0,3
...,...,...,...,...,...
2429,3009.07044,0.1410638288832567,145.0,7.0,3
2430,2509.07065,0.20279069774776928,95.0,5.0,373
2431,2229.07055,0.08603217197087751,67.0,6.0,188
2432,1839.06885,0.11239436563615962,28.0,23.0,4


In [272]:
dmx['ERA5_lon_index'] = dmx['ERA5_lon_index_mean'].astype('int')
dmx['ERA5_lat_index'] = dmx['ERA5_lat_index_mean'].astype('int')

In [273]:
dmx = dmx.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])
dmy = dmy.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])

In [274]:
dmx

#,lon_lat,Lichen_mean,N,ERA5_lon_index,ERA5_lat_index
0,2459.07055,0.18827586126481666,12,90,6
1,2239.07065,0.13291338627321983,127,68,5
2,2839.06994,0.15500000352039933,58,128,12
3,2479.06935,0.06000000052154064,6,92,18
4,2489.06925,0.029999999329447746,423,93,19
...,...,...,...,...,...
2446,3009.07044,0.10564285733604005,7,145,7
2447,2229.07055,0.12946180516155437,576,67,6
2448,2899.07085,0.1742857141154153,140,134,3
2449,2569.06925,0.07666666557391484,366,101,19


In [275]:
dmy

#,lon_lat,Lichen_mean,N
0,2239.07065,0.06981481404768096,54
1,2839.06994,0.03999999910593033,10
2,2499.06965,0.029999999329447746,6
3,2459.07055,0.13599999770522117,3
4,2479.06935,0.019999999552965164,3
...,...,...,...
2429,3009.07044,0.1410638288832567,3
2430,2509.07065,0.20279069774776928,373
2431,2229.07055,0.08603217197087751,188
2432,1839.06885,0.11239436563615962,4


In [276]:
dmx['ERA5_lon'] = Longitudes[dmx['ERA5_lon_index'].values].values
dmx['ERA5_lat'] = Latitudes[dmx['ERA5_lat_index'].values].values

In [277]:
dmx = dmx.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [278]:
dmx

#,lon_lat,Lichen_mean,N,ERA5_lon,ERA5_lat
0,2459.07055,0.18827586126481666,12,24.59000015258789,70.55000305175781
1,2239.07065,0.13291338627321983,127,22.389999389648438,70.6500015258789
2,2839.06994,0.15500000352039933,58,28.389999389648438,69.94999694824219
3,2479.06935,0.06000000052154064,6,24.790000915527344,69.3499984741211
4,2489.06925,0.029999999329447746,423,24.889999389648438,69.25
...,...,...,...,...,...
2446,3009.07044,0.10564285733604005,7,30.09000015258789,70.44999694824219
2447,2229.07055,0.12946180516155437,576,22.290000915527344,70.55000305175781
2448,2899.07085,0.1742857141154153,140,28.989999771118164,70.8499984741211
2449,2569.06925,0.07666666557391484,366,25.690000534057617,69.25


## Extract ERA5 data for  the selected period of the year (when RoS events mostly occur)

In [279]:
ERA5 = ERA5land.sel(time=slice(str(Year + 1) + '-01-01', str(Year + 1)  + '-12-31'))

In [280]:
ERA5 = ERA5.isel(time=range(Number_of_days * 24))

In [281]:
# When using ERA%-land for recent years there is an additional expver - Not used until 2019
# ERA5 = ERA5.isel(expver = 0)

In [282]:
ERA5

In [283]:
# Extract ERA5 t2m, tp and sd fields 
ERA5_t2m = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['t2m']
ERA5_tp = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['tp']
#ERA5_sd = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['sd']

In [284]:
ERA5_t2m

## Rain on Snow criteria (according to https://www.hydrol-earth-syst-sci.net/23/2983/2019/hess-23-2983-2019.pdf)
 * total rainfall volume of at least 20 mm within 12 h
### or 
 * air temperatures above 0C (273.15K)
 * and initial snowpack depth of at least 10 cm

In [285]:
# Normalizing temperature, total precipitation and snow depth values accordidng to these criteria
ERA5_t2m = ERA5_t2m / 273.15
ERA5_tp = ERA5_tp / 0.02 * 12.
#ERA5_sd = ERA5_sd / 0.1

In [286]:
dh_t2m = ERA5_t2m.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_tp = ERA5_tp.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
#dh_sd = ERA5_sd.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [287]:
dh_tp = dh_tp.drop(columns=['latitude', 'longitude'])
#dh_sd = dh_sd.drop(columns=['latitude', 'longitude'])

In [288]:
# Create labels for ERA5-land variables to replace the dates
label_t2m = ['latitude', 'longitude']
label_tp = list()
#label_sd = list()
for i in range(Number_of_days * 24):
    label_t2m.append('t2m_'+ str(i))
    label_tp.append('tp_'+ str(i))
#    label_sd.append('sd_'+ str(i))

In [289]:
dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
dh_tp.set_axis(label_tp, axis="columns", inplace=True)
#dh_sd.set_axis(label_sd, axis="columns", inplace=True)

  dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
  dh_tp.set_axis(label_tp, axis="columns", inplace=True)


In [290]:
dh_t2m

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,t2m_8750,t2m_8751,t2m_8752,t2m_8753,t2m_8754,t2m_8755,t2m_8756,t2m_8757,t2m_8758,t2m_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.975647,0.975050,0.974510,0.973563,0.973013,0.971906,0.971098,0.970288,...,0.965198,0.966552,0.966809,0.967672,0.969347,0.970679,0.971948,0.971320,0.969094,0.968082
4491,68.349998,30.690001,0.975205,0.974517,0.974020,0.973095,0.972560,0.971477,0.970696,0.969904,...,0.964510,0.965888,0.966070,0.966751,0.968384,0.969789,0.971258,0.970914,0.968733,0.967827
4492,68.349998,30.790001,0.974448,0.973685,0.973292,0.972425,0.971910,0.970905,0.970170,0.969409,...,0.963629,0.964994,0.965116,0.965600,0.967142,0.968560,0.970159,0.970084,0.968012,0.967244
4493,68.349998,30.889999,0.972933,0.972321,0.972234,0.971566,0.971098,0.970339,0.969793,0.969128,...,0.962740,0.963989,0.964033,0.964268,0.965571,0.966838,0.968344,0.968442,0.966722,0.966192


In [291]:
dh_tp

Unnamed: 0,tp_0,tp_1,tp_2,tp_3,tp_4,tp_5,tp_6,tp_7,tp_8,tp_9,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,1.895245,0.163165,0.285317,0.371112,0.444270,0.490161,0.530730,0.563097,0.575956,0.580611,...,0.016626,0.023277,0.024164,0.025273,0.026159,0.029042,0.031923,0.033253,0.033919,0.034362
4491,1.872632,0.162500,0.284874,0.370447,0.444935,0.491713,0.532282,0.565758,0.578837,0.583715,...,0.016183,0.023277,0.024164,0.025273,0.026602,0.029485,0.032367,0.033919,0.034806,0.035692
4492,1.850463,0.161613,0.283765,0.369782,0.444714,0.491934,0.532726,0.567310,0.580611,0.585488,...,0.015296,0.023277,0.024386,0.025716,0.027046,0.030372,0.033253,0.034806,0.035692,0.036579
4493,1.824082,0.161170,0.283322,0.370225,0.446266,0.494373,0.534943,0.570857,0.584380,0.589035,...,0.014853,0.023277,0.024386,0.025716,0.027046,0.030593,0.033697,0.035692,0.036579,0.037244


In [292]:
#  Glue together dh_t2m and dh_tp  <- not dh_sd
dh = pd.concat([dh_t2m, dh_tp], axis = 1)

In [293]:
dh

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.975647,0.975050,0.974510,0.973563,0.973013,0.971906,0.971098,0.970288,...,0.016626,0.023277,0.024164,0.025273,0.026159,0.029042,0.031923,0.033253,0.033919,0.034362
4491,68.349998,30.690001,0.975205,0.974517,0.974020,0.973095,0.972560,0.971477,0.970696,0.969904,...,0.016183,0.023277,0.024164,0.025273,0.026602,0.029485,0.032367,0.033919,0.034806,0.035692
4492,68.349998,30.790001,0.974448,0.973685,0.973292,0.972425,0.971910,0.970905,0.970170,0.969409,...,0.015296,0.023277,0.024386,0.025716,0.027046,0.030372,0.033253,0.034806,0.035692,0.036579
4493,68.349998,30.889999,0.972933,0.972321,0.972234,0.971566,0.971098,0.970339,0.969793,0.969128,...,0.014853,0.023277,0.024386,0.025716,0.027046,0.030593,0.033697,0.035692,0.036579,0.037244


In [294]:
# Add combined lon_lat column to dh
dh['ERA5_lon_lat'] = (dh['longitude'] * 100).astype('int') + (dh['latitude'] * 100).astype('int') / 100000

In [295]:
# Drop latitude and longitude columns which are not used anymore in dh
dh = dh.drop(columns=['latitude', 'longitude'])

In [296]:
dh

Unnamed: 0,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,t2m_9,...,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759,ERA5_lon_lat
0,,,,,,,,,,,...,,,,,,,,,,1559.07115
1,,,,,,,,,,,...,,,,,,,,,,1568.07115
2,,,,,,,,,,,...,,,,,,,,,,1578.07115
3,,,,,,,,,,,...,,,,,,,,,,1589.07115
4,,,,,,,,,,,...,,,,,,,,,,1598.07115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.975647,0.975050,0.974510,0.973563,0.973013,0.971906,0.971098,0.970288,0.968566,0.967563,...,0.023277,0.024164,0.025273,0.026159,0.029042,0.031923,0.033253,0.033919,0.034362,3059.06834
4491,0.975205,0.974517,0.974020,0.973095,0.972560,0.971477,0.970696,0.969904,0.968196,0.967159,...,0.023277,0.024164,0.025273,0.026602,0.029485,0.032367,0.033919,0.034806,0.035692,3069.06834
4492,0.974448,0.973685,0.973292,0.972425,0.971910,0.970905,0.970170,0.969409,0.967736,0.966671,...,0.023277,0.024386,0.025716,0.027046,0.030372,0.033253,0.034806,0.035692,0.036579,3079.06834
4493,0.972933,0.972321,0.972234,0.971566,0.971098,0.970339,0.969793,0.969128,0.967570,0.966505,...,0.023277,0.024386,0.025716,0.027046,0.030593,0.033697,0.035692,0.036579,0.037244,3088.06834


In [297]:
dmx

#,lon_lat,Lichen_mean,N,ERA5_lon,ERA5_lat
0,2459.07055,0.18827586126481666,12,24.59000015258789,70.55000305175781
1,2239.07065,0.13291338627321983,127,22.389999389648438,70.6500015258789
2,2839.06994,0.15500000352039933,58,28.389999389648438,69.94999694824219
3,2479.06935,0.06000000052154064,6,24.790000915527344,69.3499984741211
4,2489.06925,0.029999999329447746,423,24.889999389648438,69.25
...,...,...,...,...,...
2446,3009.07044,0.10564285733604005,7,30.09000015258789,70.44999694824219
2447,2229.07055,0.12946180516155437,576,22.290000915527344,70.55000305175781
2448,2899.07085,0.1742857141154153,140,28.989999771118164,70.8499984741211
2449,2569.06925,0.07666666557391484,366,25.690000534057617,69.25


In [298]:
dmx = dmx.drop(columns=['ERA5_lon', 'ERA5_lat'])

In [299]:
# Convert to panda dw x & y
dwx_pandas = dmx.to_pandas_df()
dwy_pandas = dmy.to_pandas_df()

## Join dwx (WLC) with dh (ERA5 t2m-tp-sd)

In [300]:
dwx_pandas

Unnamed: 0,lon_lat,Lichen_mean,N
0,2459.07055,0.188276,12
1,2239.07065,0.132913,127
2,2839.06994,0.155000,58
3,2479.06935,0.060000,6
4,2489.06925,0.030000,423
...,...,...,...
2446,3009.07044,0.105643,7
2447,2229.07055,0.129462,576
2448,2899.07085,0.174286,140
2449,2569.06925,0.076667,366


In [301]:
# Join dwx (WLC) with df (ERA5 t2m-tp)
dx = dwx_pandas.set_index('lon_lat').join(dh.set_index('ERA5_lon_lat'), on='lon_lat')

In [302]:
dx

Unnamed: 0_level_0,Lichen_mean,N,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2459.07055,0.188276,12,0.968329,0.968344,0.968841,0.969625,0.971999,0.975627,0.979889,0.981955,...,1.108238,1.416168,1.598178,1.760456,1.908325,2.139771,2.253056,2.359246,2.445485,2.505342
2239.07065,0.132913,127,,,,,,,,,...,,,,,,,,,,
2839.06994,0.155000,58,,,,,,,,,...,,,,,,,,,,
2479.06935,0.060000,6,,,,,,,,,...,,,,,,,,,,
2489.06925,0.030000,423,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3009.07044,0.105643,7,0.974646,0.976785,0.978981,0.980985,0.983348,0.985656,0.986721,0.985607,...,0.855731,1.125309,1.339685,1.468266,1.533887,1.554948,1.564259,1.576230,1.595739,1.619682
2229.07055,0.129462,576,0.985090,0.985188,0.985855,0.986812,0.988995,0.990974,0.992880,0.994278,...,6.038448,6.285635,6.421754,6.570288,6.889524,7.188587,7.548171,7.900883,8.194404,8.523838
2899.07085,0.174286,140,,,,,,,,,...,,,,,,,,,,
2569.06925,0.076667,366,0.970403,0.970059,0.969700,0.968675,0.966705,0.966589,0.967874,0.975749,...,0.139887,0.148533,0.153854,0.158066,0.161391,0.163830,0.168042,0.173806,0.180014,0.184891


In [303]:
# Drop the Rows with NaN Values
dx = dx.dropna()

In [304]:
dx = dx.reset_index()

In [305]:
dx

Unnamed: 0,lon_lat,Lichen_mean,N,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,1729.06844,0.023000,10,0.985793,0.985725,0.983523,0.982341,0.980661,0.979265,0.977570,...,1.850020,1.954437,2.081023,2.194308,2.298725,2.379199,2.466324,2.508224,2.571628,2.682917
1,2419.06875,0.077273,149,0.973603,0.973115,0.972207,0.970714,0.968611,0.967022,0.965296,...,0.042786,0.046777,0.049880,0.052319,0.054314,0.055645,0.057196,0.058970,0.060078,0.061409
2,2679.06975,0.161037,9,0.962970,0.963299,0.963567,0.963265,0.964566,0.967803,0.971715,...,0.242753,0.255611,0.266695,0.280884,0.295737,0.327660,0.400597,0.441389,0.463780,0.484397
3,2609.06994,0.114768,164,0.955466,0.955622,0.955690,0.955431,0.957508,0.961315,0.965759,...,0.307708,0.346726,0.387739,0.450478,0.540928,0.654878,0.813388,0.907607,0.998057,1.042839
4,2479.06975,0.104222,90,0.956112,0.955335,0.954141,0.953363,0.953724,0.954732,0.958189,...,0.221914,0.257162,0.281992,0.305269,0.332981,0.356703,0.402814,0.450478,0.487722,0.515212
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
849,2629.06915,0.015000,8,0.970166,0.969656,0.969405,0.968324,0.966665,0.966376,0.967062,...,0.119935,0.124812,0.127694,0.129911,0.131463,0.132793,0.135232,0.139000,0.144321,0.149420
850,1869.06855,0.137367,1238,0.967437,0.967310,0.966509,0.966239,0.965688,0.965218,0.964373,...,1.724764,1.948230,2.181006,2.412896,2.659639,2.767382,2.810612,2.840097,2.877784,2.949169
851,2579.07105,0.054444,812,0.990227,0.990582,0.991465,0.992337,0.991467,0.991347,0.990251,...,2.783122,2.906383,2.953603,2.972669,2.985527,3.016785,3.057798,3.098368,3.126301,3.150244
852,2609.06915,0.113125,48,0.970596,0.970117,0.969818,0.968757,0.966935,0.966605,0.967310,...,0.121044,0.127029,0.130133,0.132793,0.134345,0.136119,0.138557,0.142326,0.147203,0.150528


In [306]:
## Save into **local** HDF5 file without index
x_filename = os.path.join(path, 'x_mean_tp1_' + str(Year) + '.hdf')
print(x_filename)
dx.to_hdf(x_filename, key='df', mode="w", index=False)

/home/ubuntu/data/x_mean_tp1_2018.hdf


## Find locations with lichen in the following year corresponding to those in current year

In [307]:
dwx_pandas = dx[['lon_lat']]

In [308]:
dwx_pandas

Unnamed: 0,lon_lat
0,1729.06844
1,2419.06875
2,2679.06975
3,2609.06994
4,2479.06975
...,...
849,2629.06915
850,1869.06855
851,2579.07105
852,2609.06915


In [309]:
dwy_pandas

Unnamed: 0,lon_lat,Lichen_mean,N
0,2239.07065,0.069815,54
1,2839.06994,0.040000,10
2,2499.06965,0.030000,6
3,2459.07055,0.136000,3
4,2479.06935,0.020000,3
...,...,...,...
2429,3009.07044,0.141064,3
2430,2509.07065,0.202791,373
2431,2229.07055,0.086032,188
2432,1839.06885,0.112394,4


In [310]:
## Join dwx with dwy
dy = dwx_pandas.set_index('lon_lat').join(dwy_pandas.set_index('lon_lat'), on='lon_lat')

In [311]:
dy

Unnamed: 0_level_0,Lichen_mean,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1
1729.06844,0.026154,13.0
2419.06875,0.038000,102.0
2679.06975,0.122778,3.0
2609.06994,0.123336,1067.0
2479.06975,0.096452,26.0
...,...,...
2629.06915,0.079091,11.0
1869.06855,0.182484,2234.0
2579.07105,0.069744,187.0
2609.06915,0.139351,77.0


In [312]:
dy.fillna(0)

Unnamed: 0_level_0,Lichen_mean,N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1
1729.06844,0.026154,13.0
2419.06875,0.038000,102.0
2679.06975,0.122778,3.0
2609.06994,0.123336,1067.0
2479.06975,0.096452,26.0
...,...,...
2629.06915,0.079091,11.0
1869.06855,0.182484,2234.0
2579.07105,0.069744,187.0
2609.06915,0.139351,77.0


In [313]:
dy.rename(columns = {'Lichen_mean' : 'new_Lichen_mean', 'N' : 'new_N'}, inplace = True)

In [314]:
dy

Unnamed: 0_level_0,new_Lichen_mean,new_N
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1
1729.06844,0.026154,13.0
2419.06875,0.038000,102.0
2679.06975,0.122778,3.0
2609.06994,0.123336,1067.0
2479.06975,0.096452,26.0
...,...,...
2629.06915,0.079091,11.0
1869.06855,0.182484,2234.0
2579.07105,0.069744,187.0
2609.06915,0.139351,77.0


In [315]:
## Save into **local** HDF5 file without index
y_filename = os.path.join(path, 'y_mean_tp1_' + str(Year) + '.hdf')
print(y_filename)
dy.to_hdf(y_filename, key='dg', mode="w", index=False)

/home/ubuntu/data/y_mean_tp1_2018.hdf


In [316]:
print('Finished!')

Finished!
