# Lichen fractional cover statistics 
# Prepares labelled input for the Machine Learning algorithm
# (i.e. locations where moss&lichen fractional cover changes can be related to meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover
## Percentage of 100m pixel that is covered by a specific class of land cover
## Valid values 0-100, 200 = masked sea, 255 = missing

In [1]:
!date

Wed Mar 22 08:24:56 UTC 2023


In [2]:
pip install vaex tables --quiet

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import os
import pandas as pd
import s3fs
import xarray as xr
import vaex

# Input datasets, either from s3 storage or local files

# If data is available locally then jump the following cells

### Define s3 storage parameters

In [4]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 3, 10, 11, 50, 32, 549000, tzinfo=tzlocal()),
  'ETag': '"d082ee07a1ab33

## Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [5]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
GLC_AOI = xr.open_dataset(store.open(s3path))

## ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [7]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [8]:
ERA5land = xr.open_dataset(store.open(s3path))

In [9]:
ERA5land

# Datasets from **local** files

In [10]:
path = '/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/'

In [11]:
# World Land cover data from 2015-01-01 to 2019-12-31- already available as a netCDF file stored locally
GLC_filename = os.path.join(path, 'C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc')
GLC_AOI = xr.open_dataset(GLC_filename, engine = 'netcdf4')

In [12]:
# ERA5-land data already available as a netCDF file stored locally
ERA5_filename = os.path.join(path, 'reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc')
ERA5land = xr.open_dataset(ERA5_filename, engine = 'netcdf4')

In [13]:
ERA5land

In [14]:
Latitudes = ERA5land.latitude.to_index()
Longitudes = ERA5land.longitude.to_index()

In [15]:
Latitudes

Float64Index([ 71.1500015258789, 71.05000305175781, 70.94999694824219,
               70.8499984741211,             70.75,  70.6500015258789,
              70.55000305175781, 70.44999694824219,  70.3499984741211,
                          70.25,  70.1500015258789, 70.05000305175781,
              69.94999694824219,  69.8499984741211,             69.75,
               69.6500015258789, 69.55000305175781, 69.44999694824219,
               69.3499984741211,             69.25,  69.1500015258789,
              69.05000305175781, 68.94999694824219,  68.8499984741211,
                          68.75,  68.6500015258789, 68.55000305175781,
              68.44999694824219,  68.3499984741211],
             dtype='float64', name='latitude')

In [16]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [17]:
GLC_AOI

In [18]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Bare_CoverFraction_layer',
                             'Crops_CoverFraction_layer',
                             'Grass_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Shrub_CoverFraction_layer',
                             'Snow_CoverFraction_layer',
                             'Tree_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [19]:
GLC_AOI = GLC_AOI.rename(MossLichen_CoverFraction_layer = 'Lichen')

In [20]:
GLC_AOI

In [21]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon.values, GLC_AOI_max_lon.values, GLC_AOI_min_lat.values, GLC_AOI_max_lat.values)

15.595313502963002 31.06568387333461 68.35153627792579 71.18764738903712


## Use the mask to only keep pixels with lichen **every year**

In [22]:
mask = GLC_AOI['Lichen'].where((GLC_AOI['Lichen'] > 0) & (GLC_AOI['Lichen'] <= 100))

In [23]:
mask = xr.where(mask > 0, 1, 0)

In [24]:
mask = mask.sum(dim = 'time', min_count = 5, skipna=True)

In [25]:
mask = xr.where(mask >= 1, 1, 0)

In [26]:
mask

In [27]:
mask.sum()

In [28]:
de = GLC_AOI.where(mask == 1)

In [29]:
de

In [30]:
de = de.to_dataframe()

In [31]:
de = de.dropna()

In [32]:
de = de.reset_index()

In [33]:
de

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.167277,25.807351,3.0
1,2015-01-01,71.167277,25.808276,3.0
2,2015-01-01,71.167277,25.809202,3.0
3,2015-01-01,71.166351,25.745314,10.0
4,2015-01-01,71.166351,25.746239,10.0
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,11.0
2497147,2019-01-01,68.352462,19.338832,5.0
2497148,2019-01-01,68.352462,19.339758,5.0
2497149,2019-01-01,68.352462,19.987906,4.0


## Each year in a separate dataset and keep only the first 183 days

In [302]:
Year = 2019
Number_of_days = 183
print('x = WLC(' + str(Year)+ ') joined with ERA5land(' + str(Year + 1) + ')')
print('y = WLC(' + str(Year + 1) + ')')

x = WLC(2019) joined with ERA5land(2020)
y = WLC(2020)


In [303]:
# Only keep locations with lichen for the current year
df = de.loc[de['time'] == str(Year) + '-01-01']
dg = de.loc[de['time'] == str(Year + 1) + '-01-01']

In [304]:
df

Unnamed: 0,time,lat,lon,Lichen
1830452,2019-01-01,71.170055,25.782351,8.0
1830453,2019-01-01,71.170055,25.783276,8.0
1830454,2019-01-01,71.169129,25.779573,10.0
1830455,2019-01-01,71.169129,25.780499,10.0
1830456,2019-01-01,71.169129,25.781425,10.0
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,11.0
2497147,2019-01-01,68.352462,19.338832,5.0
2497148,2019-01-01,68.352462,19.339758,5.0
2497149,2019-01-01,68.352462,19.987906,4.0


In [305]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [306]:
# Normalize the fractional cover
df['Lichen'] = df['Lichen'].div(100)
dg['Lichen'] = dg['Lichen'].div(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Lichen'] = df['Lichen'].div(100)


In [307]:
df

Unnamed: 0,time,lat,lon,Lichen
1830452,2019-01-01,71.170055,25.782351,0.08
1830453,2019-01-01,71.170055,25.783276,0.08
1830454,2019-01-01,71.169129,25.779573,0.10
1830455,2019-01-01,71.169129,25.780499,0.10
1830456,2019-01-01,71.169129,25.781425,0.10
...,...,...,...,...
2497146,2019-01-01,68.352462,19.333276,0.11
2497147,2019-01-01,68.352462,19.338832,0.05
2497148,2019-01-01,68.352462,19.339758,0.05
2497149,2019-01-01,68.352462,19.987906,0.04


In [308]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [309]:
# Convert to VAEX
dvx = vaex.from_pandas(df)
dvy = vaex.from_pandas(dg)

In [310]:
dvx

#,time,lat,lon,Lichen
0,2019-01-01 00:00:00.000000000,71.17005479644453,25.782350540000856,0.08
1,2019-01-01 00:00:00.000000000,71.17005479644453,25.783276465926782,0.08
2,2019-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.1
3,2019-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.1
4,2019-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.1
...,...,...,...,...
666694,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11
666695,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05
666696,2019-01-01 00:00:00.000000000,68.35246220385172,19.339757947407747,0.05
666697,2019-01-01 00:00:00.000000000,68.35246220385172,19.987906095555946,0.04


In [311]:
dvy

#,time,lat,lon,Lichen
,,,,


In [312]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
dvx['ERA5_lon_index'] = ((dvx.lon - 15.59) / 0.1).astype('int').values
dvx['ERA5_lat_index'] = 28 - ((dvx.lat - 68.35) / 0.1).astype('int').values
dvy['ERA5_lon_index'] = ((dvy.lon - 15.59) / 0.1).astype('int').values
dvy['ERA5_lat_index'] = 28 - ((dvy.lat - 68.35) / 0.1).astype('int').values

In [313]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index
0,2019-01-01 00:00:00.000000000,71.17005479644453,25.782350540000856,0.08,101,0
1,2019-01-01 00:00:00.000000000,71.17005479644453,25.783276465926782,0.08,101,0
2,2019-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.1,101,0
3,2019-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.1,101,0
4,2019-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.1,101,0
...,...,...,...,...,...,...
666694,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11,37,28
666695,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05,37,28
666696,2019-01-01 00:00:00.000000000,68.35246220385172,19.339757947407747,0.05,37,28
666697,2019-01-01 00:00:00.000000000,68.35246220385172,19.987906095555946,0.04,43,28


# Adding columns with the ERA5-land longitude and latitude to dv

In [314]:
dvx['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvx['ERA5_lon_index'].values].values
dvx['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvx['ERA5_lat_index'].values].values
dvy['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvy['ERA5_lon_index'].values].values
dvy['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvy['ERA5_lat_index'].values].values

In [315]:
# Add combined lon_lat column to dv x & y
dvx['lon_lat'] = (dvx['ERA5_lon'] * 100).astype('int') + (dvx['ERA5_lat'] * 100).astype('int') / 100000
dvy['lon_lat'] = (dvy['ERA5_lon'] * 100).astype('int') + (dvy['ERA5_lat'] * 100).astype('int') / 100000

In [316]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lon_lat
0,2019-01-01 00:00:00.000000000,71.17005479644453,25.782350540000856,0.08,101,0,25.69,71.15,2569.07115
1,2019-01-01 00:00:00.000000000,71.17005479644453,25.783276465926782,0.08,101,0,25.69,71.15,2569.07115
2,2019-01-01 00:00:00.000000000,71.1691288705186,25.779572762223076,0.1,101,0,25.69,71.15,2569.07115
3,2019-01-01 00:00:00.000000000,71.1691288705186,25.780498688149002,0.1,101,0,25.69,71.15,2569.07115
4,2019-01-01 00:00:00.000000000,71.1691288705186,25.78142461407493,0.1,101,0,25.69,71.15,2569.07115
...,...,...,...,...,...,...,...,...,...
666694,2019-01-01 00:00:00.000000000,68.35246220385172,19.333276465926264,0.11,37,28,19.29,68.35,1929.06835
666695,2019-01-01 00:00:00.000000000,68.35246220385172,19.33883202148182,0.05,37,28,19.29,68.35,1929.06835
666696,2019-01-01 00:00:00.000000000,68.35246220385172,19.339757947407747,0.05,37,28,19.29,68.35,1929.06835
666697,2019-01-01 00:00:00.000000000,68.35246220385172,19.987906095555946,0.04,43,28,19.89,68.35,1989.06835


In [317]:
dvx = dvx.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])
dvy = dvy.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])

In [318]:
dvx

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
0,0.08,101,0,2569.07115
1,0.08,101,0,2569.07115
2,0.1,101,0,2569.07115
3,0.1,101,0,2569.07115
4,0.1,101,0,2569.07115
...,...,...,...,...
666694,0.11,37,28,1929.06835
666695,0.05,37,28,1929.06835
666696,0.05,37,28,1929.06835
666697,0.04,43,28,1989.06835


In [319]:
dvy

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
,,,,


# Calculate the mean fractional cover for each ERA5-land grid cell

In [320]:
dmx = dvx.groupby(by='lon_lat', agg='mean')
dmy = dvy.groupby(by='lon_lat', agg='mean')

In [321]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
0,2239.07065,0.06981481404768096,68.0,5.0
1,2459.07055,0.13599999770522117,90.0,6.0
2,2839.06994,0.03999999910593033,128.0,12.0
3,2499.06965,0.029999999329447746,94.0,15.0
4,2479.06935,0.019999999552965164,92.0,18.0
...,...,...,...,...
2429,2509.07065,0.20279069774776928,95.0,5.0
2430,2229.07055,0.08603217197087751,67.0,6.0
2431,3009.07044,0.1410638288832567,145.0,7.0
2432,2569.06925,0.029999999329447746,101.0,19.0


In [322]:
dmy

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
,,,,


In [323]:
dmx['ERA5_lon_index'] = dmx['ERA5_lon_index_mean'].astype('int')
dmx['ERA5_lat_index'] = dmx['ERA5_lat_index_mean'].astype('int')

In [324]:
dmx = dmx.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])
dmy = dmy.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])

In [325]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index,ERA5_lat_index
0,2239.07065,0.06981481404768096,68,5
1,2459.07055,0.13599999770522117,90,6
2,2839.06994,0.03999999910593033,128,12
3,2499.06965,0.029999999329447746,94,15
4,2479.06935,0.019999999552965164,92,18
...,...,...,...,...
2429,2509.07065,0.20279069774776928,95,5
2430,2229.07055,0.08603217197087751,67,6
2431,3009.07044,0.1410638288832567,145,7
2432,2569.06925,0.029999999329447746,101,19


In [326]:
dmy

#,lon_lat,Lichen_mean
,,


In [327]:
dmx['ERA5_lon'] = Longitudes[dmx['ERA5_lon_index'].values].values
dmx['ERA5_lat'] = Latitudes[dmx['ERA5_lat_index'].values].values

In [328]:
dmx = dmx.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [329]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon,ERA5_lat
0,2239.07065,0.06981481404768096,22.389999389648438,70.6500015258789
1,2459.07055,0.13599999770522117,24.59000015258789,70.55000305175781
2,2839.06994,0.03999999910593033,28.389999389648438,69.94999694824219
3,2499.06965,0.029999999329447746,24.989999771118164,69.6500015258789
4,2479.06935,0.019999999552965164,24.790000915527344,69.3499984741211
...,...,...,...,...
2429,2509.07065,0.20279069774776928,25.09000015258789,70.6500015258789
2430,2229.07055,0.08603217197087751,22.290000915527344,70.55000305175781
2431,3009.07044,0.1410638288832567,30.09000015258789,70.44999694824219
2432,2569.06925,0.029999999329447746,25.690000534057617,69.25


## Extract ERA5 data for  the selected period of the year (when RoS events mostly occur)

In [330]:
ERA5 = ERA5land.sel(time=slice(str(Year + 1) + '-01-01', str(Year + 1)  + '-12-31'))

In [331]:
ERA5 = ERA5.isel(time=range(Number_of_days * 24))

In [332]:
ERA5 = ERA5.isel(expver = 0)

In [333]:
ERA5

In [334]:
# Extract ERA5 t2m, tp and sd fields 
ERA5_t2m = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['t2m']
ERA5_tp = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['tp']
ERA5_sd = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['sd']

In [335]:
ERA5_t2m

## Rain on Snow criteria (according to https://www.hydrol-earth-syst-sci.net/23/2983/2019/hess-23-2983-2019.pdf)
 * total rainfall volume of at least 20 mm within 12 h
### or 
 * air temperatures above 0C (273.15K)
 * and initial snowpack depth of at least 10 cm

In [336]:
# Normalizing temperature, total precipitation and snow depth values accordidng to these criteria
ERA5_t2m = ERA5_t2m / 273.15
ERA5_tp = ERA5_tp / 0.02 * 12.
ERA5_sd = ERA5_sd / 0.1

In [337]:
dh_t2m = ERA5_t2m.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_tp = ERA5_tp.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_sd = ERA5_sd.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [338]:
dh_tp = dh_tp.drop(columns=['latitude', 'longitude'])
dh_sd = dh_sd.drop(columns=['latitude', 'longitude'])

In [339]:
# Create labels for ERA5-land variables to replace the dates
label_t2m = ['latitude', 'longitude']
label_tp = list()
label_sd = list()
for i in range(Number_of_days * 24):
    label_t2m.append('t2m_'+ str(i))
    label_tp.append('tp_'+ str(i))
    label_sd.append('sd_'+ str(i))

In [340]:
dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
dh_tp.set_axis(label_tp, axis="columns", inplace=True)
dh_sd.set_axis(label_sd, axis="columns", inplace=True)

In [341]:
dh_t2m

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,t2m_4382,t2m_4383,t2m_4384,t2m_4385,t2m_4386,t2m_4387,t2m_4388,t2m_4389,t2m_4390,t2m_4391
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,...,0.897173,0.895671,0.892886,0.889416,0.887973,0.888073,0.887505,0.887184,0.886995,0.886979
4491,68.349998,30.690001,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,...,0.896818,0.895289,0.892598,0.889149,0.887823,0.887911,0.887341,0.887017,0.886824,0.886789
4492,68.349998,30.790001,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,...,0.896385,0.894827,0.892216,0.888868,0.887565,0.887650,0.887075,0.886749,0.886556,0.886489
4493,68.349998,30.889999,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,...,0.896168,0.894543,0.891956,0.888828,0.887543,0.887634,0.887057,0.886729,0.886536,0.886454


In [342]:
dh_tp

Unnamed: 0,tp_0,tp_1,tp_2,tp_3,tp_4,tp_5,tp_6,tp_7,tp_8,tp_9,...,tp_4382,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.035027,0.000443,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,0.000886,...,6.599551,6.990172,7.256868,7.355521,7.380572,7.441093,7.488979,7.497404,7.502725,7.507380
4491,0.035914,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,6.487597,6.848954,7.089934,7.186813,7.211199,7.271277,7.314286,7.322932,7.328474,7.333130
4492,0.037022,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,6.376750,6.714830,6.934971,7.030742,7.054906,7.113655,7.152894,7.161762,7.167526,7.171739
4493,0.037909,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,0.000443,...,6.262357,6.576273,6.773801,6.866911,6.889524,6.947386,6.982413,6.991059,6.997045,7.001035


In [343]:
dh_sd

Unnamed: 0,sd_0,sd_1,sd_2,sd_3,sd_4,sd_5,sd_6,sd_7,sd_8,sd_9,...,sd_4382,sd_4383,sd_4384,sd_4385,sd_4386,sd_4387,sd_4388,sd_4389,sd_4390,sd_4391
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,1.591815,1.591815,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,1.592340,...,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001
4491,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,1.593920,...,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001
4492,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,1.542317,...,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001
4493,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,1.266922,...,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001,-0.000001


In [344]:
#  Glue together dh_t2m and dh_tp  <- not dh_sd
dh = pd.concat([dh_t2m, dh_tp], axis = 1)

In [345]:
dh

Unnamed: 0,latitude,longitude,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_4382,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,...,6.599551,6.990172,7.256868,7.355521,7.380572,7.441093,7.488979,7.497404,7.502725,7.507380
4491,68.349998,30.690001,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,...,6.487597,6.848954,7.089934,7.186813,7.211199,7.271277,7.314286,7.322932,7.328474,7.333130
4492,68.349998,30.790001,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,...,6.376750,6.714830,6.934971,7.030742,7.054906,7.113655,7.152894,7.161762,7.167526,7.171739
4493,68.349998,30.889999,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,...,6.262357,6.576273,6.773801,6.866911,6.889524,6.947386,6.982413,6.991059,6.997045,7.001035


In [346]:
# Add combined lon_lat column to dh
dh['ERA5_lon_lat'] = (dh['longitude'] * 100).astype('int') + (dh['latitude'] * 100).astype('int') / 100000

In [347]:
# Drop latitude and longitude columns which are not used anymore in dh
dh = dh.drop(columns=['latitude', 'longitude'])

In [348]:
dh

Unnamed: 0,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,t2m_9,...,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391,ERA5_lon_lat
0,,,,,,,,,,,...,,,,,,,,,,1559.07115
1,,,,,,,,,,,...,,,,,,,,,,1568.07115
2,,,,,,,,,,,...,,,,,,,,,,1578.07115
3,,,,,,,,,,,...,,,,,,,,,,1589.07115
4,,,,,,,,,,,...,,,,,,,,,,1598.07115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,0.967179,0.965225,0.962902,0.960947,0.958646,0.956662,0.954962,0.952986,0.951446,0.949975,...,6.990172,7.256868,7.355521,7.380572,7.441093,7.488979,7.497404,7.502725,7.507380,3059.06834
4491,0.966967,0.965045,0.962731,0.960909,0.958814,0.956895,0.955167,0.953294,0.951763,0.950285,...,6.848954,7.089934,7.186813,7.211199,7.271277,7.314286,7.322932,7.328474,7.333130,3069.06834
4492,0.966432,0.964532,0.962194,0.960330,0.958202,0.956196,0.954390,0.952515,0.951029,0.949593,...,6.714830,6.934971,7.030742,7.054906,7.113655,7.152894,7.161762,7.167526,7.171739,3079.06834
4493,0.965515,0.963667,0.961457,0.959640,0.957439,0.955216,0.953287,0.951430,0.949979,0.948599,...,6.576273,6.773801,6.866911,6.889524,6.947386,6.982413,6.991059,6.997045,7.001035,3088.06834


In [349]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon,ERA5_lat
0,2239.07065,0.06981481404768096,22.389999389648438,70.6500015258789
1,2459.07055,0.13599999770522117,24.59000015258789,70.55000305175781
2,2839.06994,0.03999999910593033,28.389999389648438,69.94999694824219
3,2499.06965,0.029999999329447746,24.989999771118164,69.6500015258789
4,2479.06935,0.019999999552965164,24.790000915527344,69.3499984741211
...,...,...,...,...
2429,2509.07065,0.20279069774776928,25.09000015258789,70.6500015258789
2430,2229.07055,0.08603217197087751,22.290000915527344,70.55000305175781
2431,3009.07044,0.1410638288832567,30.09000015258789,70.44999694824219
2432,2569.06925,0.029999999329447746,25.690000534057617,69.25


In [350]:
dmx = dmx.drop(columns=['ERA5_lon', 'ERA5_lat'])

In [351]:
# Convert to panda dw x & y
dwx_pandas = dmx.to_pandas_df()
dwy_pandas = dmy.to_pandas_df()

## Join dwx (WLC) with dh (ERA5 t2m-tp-sd)

In [352]:
dwx_pandas

Unnamed: 0,lon_lat,Lichen_mean
0,2239.07065,0.069815
1,2459.07055,0.136000
2,2839.06994,0.040000
3,2499.06965,0.030000
4,2479.06935,0.020000
...,...,...
2429,2509.07065,0.202791
2430,2229.07055,0.086032
2431,3009.07044,0.141064
2432,2569.06925,0.030000


In [353]:
# Join dwx (WLC) with df (ERA5 t2m-tp-sd)
dx = dwx_pandas.set_index('lon_lat').join(dh.set_index('ERA5_lon_lat'), on='lon_lat')

In [354]:
dx

Unnamed: 0_level_0,Lichen_mean,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,t2m_8,...,tp_4382,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391
lon_lat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2239.07065,0.069815,,,,,,,,,,...,,,,,,,,,,
2459.07055,0.136000,0.973279,0.971218,0.969094,0.966749,0.964950,0.963021,0.960075,0.963055,0.960292,...,5.249889,5.659576,6.080570,6.460772,6.841417,7.042713,7.219624,7.357516,7.536422,7.789371
2839.06994,0.040000,,,,,,,,,,...,,,,,,,,,,
2499.06965,0.030000,,,,,,,,,,...,,,,,,,,,,
2479.06935,0.020000,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2509.07065,0.202791,0.974078,0.972991,0.971912,0.970002,0.968682,0.965553,0.962930,0.964224,0.962236,...,5.137713,5.548287,5.963073,6.355246,6.710176,6.923443,7.134494,7.371925,7.660347,8.053850
2229.07055,0.086032,0.991920,0.991804,0.991975,0.992286,0.992334,0.992230,0.990555,0.990413,0.989749,...,6.238636,7.015445,7.843908,8.604756,9.282690,9.828053,10.199830,10.468078,10.741646,11.006789
3009.07044,0.141064,0.969875,0.969150,0.968426,0.968367,0.966536,0.965067,0.963436,0.962566,0.960645,...,3.212983,3.541531,4.055634,4.604987,5.126407,5.405961,5.646940,5.846019,5.994775,6.071923
2569.06925,0.030000,0.969156,0.967282,0.965331,0.962882,0.960359,0.957077,0.954159,0.951605,0.950660,...,7.460159,7.959853,8.438043,8.863691,9.209753,9.306633,9.391985,9.493740,9.737380,9.890791


In [355]:
# Drop the Rows with NaN Values
dx = dx.dropna()

In [356]:
dx = dx.reset_index()

In [357]:
dx

Unnamed: 0,lon_lat,Lichen_mean,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_4382,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391
0,2459.07055,0.136000,0.973279,0.971218,0.969094,0.966749,0.964950,0.963021,0.960075,0.963055,...,5.249889,5.659576,6.080570,6.460772,6.841417,7.042713,7.219624,7.357516,7.536422,7.789371
1,1729.06844,0.026154,0.984921,0.986080,0.986483,0.985884,0.985587,0.987069,0.988156,0.989829,...,1.774423,2.096985,2.513545,2.968900,3.520913,4.065610,4.609200,5.129954,5.620559,6.120696
2,2209.06865,0.225686,0.968067,0.964666,0.960758,0.956232,0.956036,0.959318,0.963904,0.965919,...,6.630588,6.994163,7.335790,7.592731,7.803339,7.980914,8.125679,8.250935,8.410775,8.634462
3,2419.06875,0.038000,0.961466,0.960447,0.958280,0.954996,0.950827,0.948725,0.949191,0.953802,...,6.159492,6.489814,6.678251,6.774466,6.834988,7.050916,7.243123,7.368600,7.497404,7.670545
4,2609.06994,0.123336,0.963609,0.961643,0.961038,0.961175,0.961570,0.961175,0.959928,0.951765,...,5.190698,5.411281,5.627653,5.982139,6.369434,6.555212,6.764933,6.953815,7.178167,7.449740
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007,2609.06915,0.139351,0.964082,0.961741,0.958746,0.955675,0.953323,0.950425,0.947077,0.944762,...,7.668772,8.184206,8.632467,9.025306,9.343212,9.460044,9.565569,9.681070,9.861971,9.960846
1008,2509.07065,0.202791,0.974078,0.972991,0.971912,0.970002,0.968682,0.965553,0.962930,0.964224,...,5.137713,5.548287,5.963073,6.355246,6.710176,6.923443,7.134494,7.371925,7.660347,8.053850
1009,2229.07055,0.086032,0.991920,0.991804,0.991975,0.992286,0.992334,0.992230,0.990555,0.990413,...,6.238636,7.015445,7.843908,8.604756,9.282690,9.828053,10.199830,10.468078,10.741646,11.006789
1010,3009.07044,0.141064,0.969875,0.969150,0.968426,0.968367,0.966536,0.965067,0.963436,0.962566,...,3.212983,3.541531,4.055634,4.604987,5.126407,5.405961,5.646940,5.846019,5.994775,6.071923


In [358]:
## Save into **local** HDF5 file without index
x_filename = os.path.join(path, 'x_mean_tp2_' + str(Year) + '.hdf')
print(x_filename)
dx.to_hdf(x_filename, key='df', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/x_mean_tps1_2019.hdf


## Find locations with lichen in the following year corresponding to those in current year

In [359]:
dwx_pandas = dx[['lon_lat']]

In [360]:
dwx_pandas

Unnamed: 0,lon_lat
0,2459.07055
1,1729.06844
2,2209.06865
3,2419.06875
4,2609.06994
...,...
1007,2609.06915
1008,2509.07065
1009,2229.07055
1010,3009.07044


In [361]:
dwy_pandas

Unnamed: 0,lon_lat,Lichen_mean


In [362]:
## Join dwx with dwy
dy = dwx_pandas.set_index('lon_lat').join(dwy_pandas.set_index('lon_lat'), on='lon_lat')

In [363]:
dy

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,
1729.06844,
2209.06865,
2419.06875,
2609.06994,
...,...
2609.06915,
2509.07065,
2229.07055,
3009.07044,


In [364]:
dy.fillna(0)

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,0.0
1729.06844,0.0
2209.06865,0.0
2419.06875,0.0
2609.06994,0.0
...,...
2609.06915,0.0
2509.07065,0.0
2229.07055,0.0
3009.07044,0.0


In [365]:
dy.rename(columns = {'Lichen_mean' : 'new_Lichen_mean'}, inplace = True)

In [366]:
dy

Unnamed: 0_level_0,new_Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,
1729.06844,
2209.06865,
2419.06875,
2609.06994,
...,...
2609.06915,
2509.07065,
2229.07055,
3009.07044,


In [367]:
## Save into **local** HDF5 file without index
y_filename = os.path.join(path, 'y_mean_tp2_' + str(Year) + '.hdf')
print(y_filename)
dy.to_hdf(y_filename, key='dg', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/y_mean_tps1_2019.hdf


In [368]:
print('Finished!')

Finished!
