# Lichen fractional cover statistics 
# Prepares labelled input for the Machine Learning algorithm
# (i.e. locations where moss&lichen fractional cover changes can be related to meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover
## Percentage of 100m pixel that is covered by a specific class of land cover
## Valid values 0-100, 200 = masked sea, 255 = missing

In [1]:
!date

Wed Mar 22 09:33:01 UTC 2023


In [2]:
pip install vaex tables --quiet

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import os
import pandas as pd
import s3fs
import xarray as xr
import vaex

# Input datasets, either from s3 storage or local files

# If data is available locally then jump the following cells

### Define s3 storage parameters

In [4]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 3, 10, 11, 50, 32, 549000, tzinfo=tzlocal()),
  'ETag': '"d082ee07a1ab33

## Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [5]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
GLC_AOI = xr.open_dataset(store.open(s3path))

## ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [7]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [8]:
ERA5land = xr.open_dataset(store.open(s3path))

In [9]:
print(ERA5land.mean(), ERA5land.std())

<xarray.Dataset>
Dimensions:  ()
Data variables:
    sd       float32 0.1085
    t2m      float32 263.7
    tp       float32 0.001208 <xarray.Dataset>
Dimensions:  ()
Data variables:
    sd       float64 0.1709
    t2m      float64 72.7
    tp       float64 0.002128


# Datasets from **local** files

In [10]:
path = '/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/'

In [11]:
# World Land cover data from 2015-01-01 to 2019-12-31- already available as a netCDF file stored locally
GLC_filename = os.path.join(path, 'C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc')
GLC_AOI = xr.open_dataset(GLC_filename, engine = 'netcdf4')

In [12]:
# ERA5-land data already available as a netCDF file stored locally
ERA5_filename = os.path.join(path, 'reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc')
ERA5land = xr.open_dataset(ERA5_filename, engine = 'netcdf4')

In [13]:
ERA5land

In [14]:
Latitudes = ERA5land.latitude.to_index()
Longitudes = ERA5land.longitude.to_index()

In [15]:
Latitudes

Float64Index([ 71.1500015258789, 71.05000305175781, 70.94999694824219,
               70.8499984741211,             70.75,  70.6500015258789,
              70.55000305175781, 70.44999694824219,  70.3499984741211,
                          70.25,  70.1500015258789, 70.05000305175781,
              69.94999694824219,  69.8499984741211,             69.75,
               69.6500015258789, 69.55000305175781, 69.44999694824219,
               69.3499984741211,             69.25,  69.1500015258789,
              69.05000305175781, 68.94999694824219,  68.8499984741211,
                          68.75,  68.6500015258789, 68.55000305175781,
              68.44999694824219,  68.3499984741211],
             dtype='float64', name='latitude')

In [16]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [17]:
GLC_AOI

In [18]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Bare_CoverFraction_layer',
                             'Crops_CoverFraction_layer',
                             'Grass_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Shrub_CoverFraction_layer',
                             'Snow_CoverFraction_layer',
                             'Tree_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [19]:
GLC_AOI = GLC_AOI.rename(MossLichen_CoverFraction_layer = 'Lichen')

In [20]:
GLC_AOI

In [21]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon.values, GLC_AOI_max_lon.values, GLC_AOI_min_lat.values, GLC_AOI_max_lat.values)

15.595313502963002 31.06568387333461 68.35153627792579 71.18764738903712


## Use the mask to only keep pixels with lichen **every year**

In [22]:
mask = GLC_AOI['Lichen'].where((GLC_AOI['Lichen'] > 0) & (GLC_AOI['Lichen'] <= 100))

In [23]:
mask = xr.where(mask > 0, 1, 0)

In [24]:
mask = mask.sum(dim = 'time', min_count = 5, skipna=True)

In [25]:
mask = xr.where(mask >= 3, 1, 0)

In [26]:
mask

In [27]:
mask.sum()

In [28]:
de = GLC_AOI.where(mask == 1)

In [29]:
de

In [30]:
de = de.to_dataframe()

In [31]:
de = de.dropna()

In [32]:
de = de.reset_index()

In [33]:
de

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.166351,25.745314,10.0
1,2015-01-01,71.166351,25.746239,10.0
2,2015-01-01,71.166351,25.747165,10.0
3,2015-01-01,71.166351,25.765684,5.0
4,2015-01-01,71.166351,25.766610,5.0
...,...,...,...,...
1591023,2019-01-01,68.352462,19.331425,11.0
1591024,2019-01-01,68.352462,19.332351,11.0
1591025,2019-01-01,68.352462,19.333276,11.0
1591026,2019-01-01,68.352462,19.338832,5.0


## Each year in a separate dataset and keep only the first 183 days

In [306]:
Year = 2019
Number_of_days = 365
print('x = WLC(' + str(Year)+ ') joined with ERA5land(' + str(Year + 1) + ')')
print('y = WLC(' + str(Year + 1) + ')')

x = WLC(2019) joined with ERA5land(2020)
y = WLC(2020)


In [307]:
# Only keep locations with lichen for the current year
df = de.loc[de['time'] == str(Year) + '-01-01']
dg = de.loc[de['time'] == str(Year + 1) + '-01-01']

In [308]:
df

Unnamed: 0,time,lat,lon,Lichen
1241392,2019-01-01,71.166351,25.810128,3.0
1241393,2019-01-01,71.166351,25.811054,3.0
1241394,2019-01-01,71.166351,25.811980,3.0
1241395,2019-01-01,71.166351,25.812906,2.0
1241396,2019-01-01,71.166351,25.813832,2.0
...,...,...,...,...
1591023,2019-01-01,68.352462,19.331425,11.0
1591024,2019-01-01,68.352462,19.332351,11.0
1591025,2019-01-01,68.352462,19.333276,11.0
1591026,2019-01-01,68.352462,19.338832,5.0


In [309]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [310]:
# Normalize the fractional cover
df['Lichen'] = df['Lichen'].div(100)
dg['Lichen'] = dg['Lichen'].div(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Lichen'] = df['Lichen'].div(100)


In [311]:
df

Unnamed: 0,time,lat,lon,Lichen
1241392,2019-01-01,71.166351,25.810128,0.03
1241393,2019-01-01,71.166351,25.811054,0.03
1241394,2019-01-01,71.166351,25.811980,0.03
1241395,2019-01-01,71.166351,25.812906,0.02
1241396,2019-01-01,71.166351,25.813832,0.02
...,...,...,...,...
1591023,2019-01-01,68.352462,19.331425,0.11
1591024,2019-01-01,68.352462,19.332351,0.11
1591025,2019-01-01,68.352462,19.333276,0.11
1591026,2019-01-01,68.352462,19.338832,0.05


In [312]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [313]:
# Convert to VAEX
dvx = vaex.from_pandas(df)
dvy = vaex.from_pandas(dg)

In [314]:
dvx

#,time,lat,lon,Lichen
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03
2,2019-01-01 00:00:00.000000000,71.16635109274083,25.811980169630488,0.03
3,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02
4,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02
...,...,...,...,...
349631,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11
349632,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11
349633,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11
349634,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05


In [315]:
dvy

#,time,lat,lon,Lichen
,,,,


In [316]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
dvx['ERA5_lon_index'] = ((dvx.lon - 15.59) / 0.1).astype('int').values
dvx['ERA5_lat_index'] = 28 - ((dvx.lat - 68.35) / 0.1).astype('int').values
dvy['ERA5_lon_index'] = ((dvy.lon - 15.59) / 0.1).astype('int').values
dvy['ERA5_lat_index'] = 28 - ((dvy.lat - 68.35) / 0.1).astype('int').values

In [317]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03,102,0
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03,102,0
2,2019-01-01 00:00:00.000000000,71.16635109274083,25.811980169630488,0.03,102,0
3,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02,102,0
4,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02,102,0
...,...,...,...,...,...,...
349631,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11,37,28
349632,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11,37,28
349633,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11,37,28
349634,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05,37,28


# Adding columns with the ERA5-land longitude and latitude to dv

In [318]:
dvx['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvx['ERA5_lon_index'].values].values
dvx['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvx['ERA5_lat_index'].values].values
dvy['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvy['ERA5_lon_index'].values].values
dvy['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvy['ERA5_lat_index'].values].values

In [319]:
# Add combined lon_lat column to dv x & y
dvx['lon_lat'] = (dvx['ERA5_lon'] * 100).astype('int') + (dvx['ERA5_lat'] * 100).astype('int') / 100000
dvy['lon_lat'] = (dvy['ERA5_lon'] * 100).astype('int') + (dvy['ERA5_lat'] * 100).astype('int') / 100000

In [320]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lon_lat
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.810128317778634,0.03,102,0,25.79,71.15,2579.07115
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.81105424370456,0.03,102,0,25.79,71.15,2579.07115
2,2019-01-01 00:00:00.000000000,71.16635109274083,25.811980169630488,0.03,102,0,25.79,71.15,2579.07115
3,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02,102,0,25.79,71.15,2579.07115
4,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02,102,0,25.79,71.15,2579.07115
...,...,...,...,...,...,...,...,...,...
349631,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11,37,28,19.29,68.35,1929.06835
349632,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11,37,28,19.29,68.35,1929.06835
349633,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11,37,28,19.29,68.35,1929.06835
349634,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05,37,28,19.29,68.35,1929.06835


In [321]:
dvx = dvx.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])
dvy = dvy.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])

In [322]:
dvx

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
0,0.03,102,0,2579.07115
1,0.03,102,0,2579.07115
2,0.03,102,0,2579.07115
3,0.02,102,0,2579.07115
4,0.02,102,0,2579.07115
...,...,...,...,...
349631,0.11,37,28,1929.06835
349632,0.11,37,28,1929.06835
349633,0.11,37,28,1929.06835
349634,0.05,37,28,1929.06835


In [323]:
dvy

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
,,,,


# Calculate the mean fractional cover for each ERA5-land grid cell

In [324]:
dmx = dvx.groupby(by='lon_lat', agg='mean')
dmy = dvy.groupby(by='lon_lat', agg='mean')

In [325]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
0,2459.07055,0.14571428245731763,90.0,6.0
1,2239.07065,0.06981481404768096,68.0,5.0
2,2839.06994,0.019999999552965164,128.0,12.0
3,2479.06935,0.019999999552965164,92.0,18.0
4,2049.06894,0.11961111114360393,49.0,22.0
...,...,...,...,...
2284,2509.07065,0.15457142870873214,95.0,5.0
2285,2229.07055,0.08979338928595308,67.0,6.0
2286,3009.07044,0.07999999938939495,145.0,7.0
2287,2569.06925,0.029999999329447746,101.0,19.0


In [326]:
dmy

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
,,,,


In [327]:
dmx['ERA5_lon_index'] = dmx['ERA5_lon_index_mean'].astype('int')
dmx['ERA5_lat_index'] = dmx['ERA5_lat_index_mean'].astype('int')

In [328]:
dmx = dmx.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])
dmy = dmy.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])

In [329]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index,ERA5_lat_index
0,2459.07055,0.14571428245731763,90,6
1,2239.07065,0.06981481404768096,68,5
2,2839.06994,0.019999999552965164,128,12
3,2479.06935,0.019999999552965164,92,18
4,2049.06894,0.11961111114360393,49,22
...,...,...,...,...
2284,2509.07065,0.15457142870873214,95,5
2285,2229.07055,0.08979338928595308,67,6
2286,3009.07044,0.07999999938939495,145,7
2287,2569.06925,0.029999999329447746,101,19


In [330]:
dmy

#,lon_lat,Lichen_mean
,,


In [331]:
dmx['ERA5_lon'] = Longitudes[dmx['ERA5_lon_index'].values].values
dmx['ERA5_lat'] = Latitudes[dmx['ERA5_lat_index'].values].values

In [332]:
dmx = dmx.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [333]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon,ERA5_lat
0,2459.07055,0.14571428245731763,24.59000015258789,70.55000305175781
1,2239.07065,0.06981481404768096,22.389999389648438,70.6500015258789
2,2839.06994,0.019999999552965164,28.389999389648438,69.94999694824219
3,2479.06935,0.019999999552965164,24.790000915527344,69.3499984741211
4,2049.06894,0.11961111114360393,20.489999771118164,68.94999694824219
...,...,...,...,...
2284,2509.07065,0.15457142870873214,25.09000015258789,70.6500015258789
2285,2229.07055,0.08979338928595308,22.290000915527344,70.55000305175781
2286,3009.07044,0.07999999938939495,30.09000015258789,70.44999694824219
2287,2569.06925,0.029999999329447746,25.690000534057617,69.25


## Extract ERA5 data for  the selected period of the year (when RoS events mostly occur)

In [334]:
ERA5 = ERA5land.sel(time=slice(str(Year + 1) + '-01-01', str(Year + 1)  + '-12-31'))

In [335]:
ERA5 = ERA5.isel(time=range(Number_of_days * 24))

In [336]:
ERA5 = ERA5.isel(expver = 0)

In [337]:
ERA5

In [338]:
# Extract ERA5 t2m, tp and sd fields 
ERA5_t2m = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['t2m']
ERA5_tp = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['tp']
ERA5_sd = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['sd']

In [339]:
ERA5_t2m

## Rain on Snow criteria (according to https://www.hydrol-earth-syst-sci.net/23/2983/2019/hess-23-2983-2019.pdf)
 * total rainfall volume of at least 20 mm within 12 h
### or 
 * air temperatures above 0C (273.15K)
 * and initial snowpack depth of at least 10 cm

In [340]:
# Normalizing temperature, total precipitation and snow depth values accordidng to these criteria
ERA5_t2m = ERA5_t2m / 273.15
ERA5_tp = ERA5_tp / 0.02 * 12.
ERA5_sd = ERA5_sd / 0.1

In [341]:
# Normalizing temperature, total precipitation and snow depth values accordidng to the mean and std
# Mean:
#     sd       float32 0.1085
#     t2m      float32 263.7
#     tp       float32 0.001208
# Std:
#     sd       float64 0.1709
#     t2m      float64 72.7
#     tp       float64 0.002128
# ERA5_t2m = ERA5_t2m / 263.7 - 72.7
# ERA5_tp = ERA5_tp / 0.001208 - 0.002128
# ERA5_sd = ERA5_sd / 0.1085 - 0.1709

In [342]:
dh_t2m = ERA5_t2m.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_tp = ERA5_tp.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_sd = ERA5_sd.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [343]:
dh_tp = dh_tp.drop(columns=['latitude', 'longitude'])
dh_sd = dh_sd.drop(columns=['latitude', 'longitude'])

In [344]:
# Create labels for ERA5-land variables to replace the dates
label_t2m = ['latitude', 'longitude']
label_tp = list()
label_sd = list()
for i in range(Number_of_days * 24):
    label_t2m.append('t2m_'+ str(i))
    label_tp.append('tp_'+ str(i))
    label_sd.append('sd_'+ str(i))

In [345]:
dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
dh_tp.set_axis(label_tp, axis="columns", inplace=True)
dh_sd.set_axis(label_sd, axis="columns", inplace=True)

In [346]:
dh_t2m

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,t2m_8750,t2m_8751,t2m_8752,t2m_8753,t2m_8754,t2m_8755,t2m_8756,t2m_8757,t2m_8758,t2m_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,...,0.976112,0.976101,0.976190,0.976545,0.976536,0.977297,0.977266,0.976889,0.975831,0.974644
4491,68.349998,30.690001,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,...,0.975915,0.975862,0.975973,0.976376,0.976419,0.977204,0.977217,0.976896,0.975902,0.974750
4492,68.349998,30.790001,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,...,0.975715,0.975629,0.975766,0.976206,0.976286,0.977100,0.977147,0.976878,0.975957,0.974846
4493,68.349998,30.889999,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,...,0.975602,0.975478,0.975627,0.976095,0.976221,0.977044,0.977120,0.976918,0.976099,0.975050


In [347]:
dh_tp

Unnamed: 0,tp_0,tp_1,tp_2,tp_3,tp_4,tp_5,tp_6,tp_7,tp_8,tp_9,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.035027,0.000443,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,...,0.207282,0.209942,0.213711,0.218588,0.224130,0.238762,0.254946,0.265587,0.271573,0.276007
4491,0.035914,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,0.194424,0.196641,0.199966,0.204400,0.209720,0.222800,0.237654,0.247186,0.252729,0.256941
4492,0.037022,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,0.181344,0.183117,0.185778,0.189990,0.194867,0.206395,0.219475,0.228121,0.232998,0.236767
4493,0.037909,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,0.169373,0.170702,0.173363,0.177131,0.181565,0.191763,0.203513,0.211051,0.215484,0.219031


In [348]:
dh_sd

Unnamed: 0,sd_0,sd_1,sd_2,sd_3,sd_4,sd_5,sd_6,sd_7,sd_8,sd_9,...,sd_8750,sd_8751,sd_8752,sd_8753,sd_8754,sd_8755,sd_8756,sd_8757,sd_8758,sd_8759
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,1.591815,1.591815,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,...,0.459167,0.459167,0.459167,0.459167,0.459167,0.459167,0.459167,0.459694,0.459694,0.459694
4491,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,...,0.460746,0.460746,0.460746,0.460746,0.460746,0.461273,0.461273,0.461273,0.461273,0.461273
4492,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,...,0.449688,0.449688,0.449688,0.449688,0.449688,0.449688,0.449688,0.450215,0.450215,0.450215
4493,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,...,0.377549,0.377549,0.377549,0.377549,0.377549,0.377549,0.377549,0.377549,0.377549,0.377549


In [349]:
#  Glue together dh_t2m and dh_tp  <- not dh_sd
dh = pd.concat([dh_t2m, dh_tp], axis = 1)

In [350]:
dh

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,...,0.207282,0.209942,0.213711,0.218588,0.224130,0.238762,0.254946,0.265587,0.271573,0.276007
4491,68.349998,30.690001,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,...,0.194424,0.196641,0.199966,0.204400,0.209720,0.222800,0.237654,0.247186,0.252729,0.256941
4492,68.349998,30.790001,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,...,0.181344,0.183117,0.185778,0.189990,0.194867,0.206395,0.219475,0.228121,0.232998,0.236767
4493,68.349998,30.889999,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,...,0.169373,0.170702,0.173363,0.177131,0.181565,0.191763,0.203513,0.211051,0.215484,0.219031


In [351]:
# Add combined lon_lat column to dh
dh['ERA5_lon_lat'] = (dh['longitude'] * 100).astype('int') + (dh['latitude'] * 100).astype('int') / 100000

In [352]:
# Drop latitude and longitude columns which are not used anymore in dh
dh = dh.drop(columns=['latitude', 'longitude'])

In [353]:
dh

Unnamed: 0,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,t2m_9,...,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759,ERA5_lon_lat
0,,,,,,,,,,,...,,,,,,,,,,1559.07115
1,,,,,,,,,,,...,,,,,,,,,,1568.07115
2,,,,,,,,,,,...,,,,,,,,,,1578.07115
3,,,,,,,,,,,...,,,,,,,,,,1589.07115
4,,,,,,,,,,,...,,,,,,,,,,1598.07115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,0.951446,0.949975,...,0.209942,0.213711,0.218588,0.224130,0.238762,0.254946,0.265587,0.271573,0.276007,3059.06834
4491,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,0.951763,0.950285,...,0.196641,0.199966,0.204400,0.209720,0.222800,0.237654,0.247186,0.252729,0.256941,3069.06834
4492,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,0.951029,0.949593,...,0.183117,0.185778,0.189990,0.194867,0.206395,0.219475,0.228121,0.232998,0.236767,3079.06834
4493,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,0.949979,0.948599,...,0.170702,0.173363,0.177131,0.181565,0.191763,0.203513,0.211051,0.215484,0.219031,3088.06834


In [354]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon,ERA5_lat
0,2459.07055,0.14571428245731763,24.59000015258789,70.55000305175781
1,2239.07065,0.06981481404768096,22.389999389648438,70.6500015258789
2,2839.06994,0.019999999552965164,28.389999389648438,69.94999694824219
3,2479.06935,0.019999999552965164,24.790000915527344,69.3499984741211
4,2049.06894,0.11961111114360393,20.489999771118164,68.94999694824219
...,...,...,...,...
2284,2509.07065,0.15457142870873214,25.09000015258789,70.6500015258789
2285,2229.07055,0.08979338928595308,22.290000915527344,70.55000305175781
2286,3009.07044,0.07999999938939495,30.09000015258789,70.44999694824219
2287,2569.06925,0.029999999329447746,25.690000534057617,69.25


In [355]:
dmx = dmx.drop(columns=['ERA5_lon', 'ERA5_lat'])

In [356]:
# Convert to panda dw x & y
dwx_pandas = dmx.to_pandas_df()
dwy_pandas = dmy.to_pandas_df()

## Join dwx (WLC) with dh (ERA5 t2m-tp-sd)

In [357]:
dwx_pandas

Unnamed: 0,lon_lat,Lichen_mean
0,2459.07055,0.145714
1,2239.07065,0.069815
2,2839.06994,0.020000
3,2479.06935,0.020000
4,2049.06894,0.119611
...,...,...
2284,2509.07065,0.154571
2285,2229.07055,0.089793
2286,3009.07044,0.080000
2287,2569.06925,0.030000


In [358]:
# Join dwx (WLC) with df (ERA5 t2m-tp-sd)
dx = dwx_pandas.set_index('lon_lat').join(dh.set_index('ERA5_lon_lat'), on='lon_lat')

In [359]:
dx

Unnamed: 0_level_0,Lichen_mean,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2459.07055,0.145714,0.973279,0.971218,0.969094,0.966749,0.964950,0.963021,0.960075,0.963055,0.960292,...,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152524
2239.07065,0.069815,,,,,,,,,,...,,,,,,,,,,
2839.06994,0.020000,,,,,,,,,,...,,,,,,,,,,
2479.06935,0.020000,,,,,,,,,,...,,,,,,,,,,
2049.06894,0.119611,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2509.07065,0.154571,0.974078,0.972991,0.971912,0.970002,0.968682,0.965553,0.962930,0.964224,0.962236,...,0.120157,0.120157,0.120157,0.120157,0.120157,0.120157,0.120157,0.120379,0.120379,0.120379
2229.07055,0.089793,0.991920,0.991804,0.991975,0.992286,0.992334,0.992230,0.990555,0.990413,0.989749,...,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283
3009.07044,0.080000,0.969875,0.969150,0.968426,0.968367,0.966536,0.965067,0.963436,0.962566,0.960645,...,0.149863,0.150750,0.152081,0.153411,0.154741,0.155627,0.156514,0.157401,0.158288,0.159840
2569.06925,0.030000,0.969156,0.967282,0.965331,0.962882,0.960359,0.957077,0.954159,0.951605,0.950660,...,0.172033,0.180457,0.187108,0.193980,0.202626,0.220361,0.235658,0.248738,0.258271,0.268690


In [360]:
# Drop the Rows with NaN Values
dx = dx.dropna()

In [361]:
dx = dx.reset_index()

In [362]:
dx

Unnamed: 0,lon_lat,Lichen_mean,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_8750,tp_8751,tp_8752,tp_8753,tp_8754,tp_8755,tp_8756,tp_8757,tp_8758,tp_8759
0,2459.07055,0.145714,0.973279,0.971218,0.969094,0.966749,0.964950,0.963021,0.960075,0.963055,...,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152302,0.152524
1,2209.06865,0.215616,0.968067,0.964666,0.960758,0.956232,0.956036,0.959318,0.963904,0.965919,...,0.915810,0.967242,1.015571,1.060796,1.096267,1.139275,1.172972,1.210660,1.248126,1.279828
2,2419.06875,0.038000,0.961466,0.960447,0.958280,0.954996,0.950827,0.948725,0.949191,0.953802,...,0.634926,0.666406,0.694339,0.714291,0.727372,0.775257,0.800530,0.816935,0.828020,0.842873
3,2609.06994,0.112560,0.963609,0.961643,0.961038,0.961175,0.961570,0.961175,0.959928,0.951765,...,0.157179,0.160505,0.164274,0.168486,0.182009,0.208168,0.231003,0.250733,0.262483,0.270686
4,2679.06975,0.100800,0.959917,0.955364,0.953143,0.951796,0.950842,0.951111,0.949107,0.943482,...,0.186221,0.191985,0.199079,0.210829,0.228121,0.247630,0.260488,0.272459,0.281549,0.289086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
836,2609.06915,0.111143,0.964082,0.961741,0.958746,0.955675,0.953323,0.950425,0.947077,0.944762,...,0.178462,0.186221,0.191985,0.199523,0.208612,0.229673,0.247186,0.260710,0.271129,0.283322
837,2509.07065,0.154571,0.974078,0.972991,0.971912,0.970002,0.968682,0.965553,0.962930,0.964224,...,0.120157,0.120157,0.120157,0.120157,0.120157,0.120157,0.120157,0.120379,0.120379,0.120379
838,2229.07055,0.089793,0.991920,0.991804,0.991975,0.992286,0.992334,0.992230,0.990555,0.990413,...,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283,0.160283
839,3009.07044,0.080000,0.969875,0.969150,0.968426,0.968367,0.966536,0.965067,0.963436,0.962566,...,0.149863,0.150750,0.152081,0.153411,0.154741,0.155627,0.156514,0.157401,0.158288,0.159840


In [363]:
## Save into **local** HDF5 file without index
x_filename = os.path.join(path, 'x_mean_tp3_' + str(Year) + '.hdf')
print(x_filename)
dx.to_hdf(x_filename, key='df', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/x_mean_tp3_2019.hdf


## Find locations with lichen in the following year corresponding to those in current year

In [364]:
dwx_pandas = dx[['lon_lat']]

In [365]:
dwx_pandas

Unnamed: 0,lon_lat
0,2459.07055
1,2209.06865
2,2419.06875
3,2609.06994
4,2679.06975
...,...
836,2609.06915
837,2509.07065
838,2229.07055
839,3009.07044


In [366]:
dwy_pandas

Unnamed: 0,lon_lat,Lichen_mean


In [367]:
## Join dwx with dwy
dy = dwx_pandas.set_index('lon_lat').join(dwy_pandas.set_index('lon_lat'), on='lon_lat')

In [368]:
dy

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,
2209.06865,
2419.06875,
2609.06994,
2679.06975,
...,...
2609.06915,
2509.07065,
2229.07055,
3009.07044,


In [369]:
dy.fillna(0)

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,0.0
2209.06865,0.0
2419.06875,0.0
2609.06994,0.0
2679.06975,0.0
...,...
2609.06915,0.0
2509.07065,0.0
2229.07055,0.0
3009.07044,0.0


In [370]:
dy.rename(columns = {'Lichen_mean' : 'new_Lichen_mean'}, inplace = True)

In [371]:
dy

Unnamed: 0_level_0,new_Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,
2209.06865,
2419.06875,
2609.06994,
2679.06975,
...,...
2609.06915,
2509.07065,
2229.07055,
3009.07044,


In [372]:
## Save into **local** HDF5 file without index
y_filename = os.path.join(path, 'y_mean_tp3_' + str(Year) + '.hdf')
print(y_filename)
dy.to_hdf(y_filename, key='dg', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/y_mean_tp3_2019.hdf


In [373]:
print('Finished!')

Finished!
