# Lichen fractional cover statistics 
# Prepares labelled input for the Machine Learning algorithm
# (i.e. locations where moss&lichen fractional cover changes can be related to meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover
## Percentage of 100m pixel that is covered by a specific class of land cover
## Valid values 0-100, 200 = masked sea, 255 = missing

In [1]:
!date

Sun Mar 19 08:15:50 UTC 2023


In [2]:
pip install vaex tables --quiet

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import os
import pandas as pd
import s3fs
import xarray as xr
import vaex

# Input datasets, either from s3 storage or local files

# If data is available locally then jump the following cells

### Define s3 storage parameters

In [None]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

## Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [None]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [None]:
GLC_AOI = xr.open_dataset(store.open(s3path))

## ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation

In [None]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [None]:
ERA5land = xr.open_dataset(store.open(s3path))

In [None]:
ERA5land

# Datasets from **local** files

In [4]:
path = '/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/'

In [5]:
# World Land cover data from 2015-01-01 to 2019-12-31- already available as a netCDF file stored locally
GLC_filename = os.path.join(path, 'C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc')
GLC_AOI = xr.open_dataset(GLC_filename, engine = 'netcdf4')

In [6]:
# ERA5-land data already available as a netCDF file stored locally
ERA5_filename = os.path.join(path, 'reanalysis-era5-land_hourly_2015-01-01_2022-12-31_Troms-Finnmark_T2m-SD-TP.nc')
ERA5land = xr.open_dataset(ERA5_filename, engine = 'netcdf4')

In [7]:
ERA5land

In [8]:
Latitudes = ERA5land.latitude.to_index()
Longitudes = ERA5land.longitude.to_index()

In [9]:
Latitudes

Float64Index([ 71.1500015258789, 71.05000305175781, 70.94999694824219,
               70.8499984741211,             70.75,  70.6500015258789,
              70.55000305175781, 70.44999694824219,  70.3499984741211,
                          70.25,  70.1500015258789, 70.05000305175781,
              69.94999694824219,  69.8499984741211,             69.75,
               69.6500015258789, 69.55000305175781, 69.44999694824219,
               69.3499984741211,             69.25,  69.1500015258789,
              69.05000305175781, 68.94999694824219,  68.8499984741211,
                          68.75,  68.6500015258789, 68.55000305175781,
              68.44999694824219,  68.3499984741211],
             dtype='float64', name='latitude')

In [10]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [11]:
GLC_AOI

In [12]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Bare_CoverFraction_layer',
                             'Crops_CoverFraction_layer',
                             'Grass_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Shrub_CoverFraction_layer',
                             'Snow_CoverFraction_layer',
                             'Tree_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [13]:
GLC_AOI = GLC_AOI.rename(MossLichen_CoverFraction_layer = 'Lichen')

In [14]:
GLC_AOI

In [15]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon.values, GLC_AOI_max_lon.values, GLC_AOI_min_lat.values, GLC_AOI_max_lat.values)

15.595313502963002 31.06568387333461 68.35153627792579 71.18764738903712


## Use the mask to only keep pixels with lichen **every year**

In [16]:
mask = GLC_AOI['Lichen'].where((GLC_AOI['Lichen'] > 0) & (GLC_AOI['Lichen'] <= 100))

In [17]:
mask = xr.where(mask > 0, 1, 0)

In [18]:
mask = mask.sum(dim = 'time', min_count = 5, skipna=True)

In [19]:
mask = xr.where(mask >= 5, 1, 0)

In [20]:
mask

In [21]:
mask.sum()

In [28]:
de = GLC_AOI.where(mask == 1)

In [29]:
de

In [30]:
de = de.to_dataframe()

In [31]:
de = de.dropna()

In [32]:
de = de.reset_index()

In [33]:
de

Unnamed: 0,time,lat,lon,Lichen
0,2015-01-01,71.166351,25.812906,1.0
1,2015-01-01,71.166351,25.813832,1.0
2,2015-01-01,71.152462,25.693462,2.0
3,2015-01-01,71.152462,25.694388,2.0
4,2015-01-01,71.152462,25.695314,2.0
...,...,...,...,...
479665,2019-01-01,68.352462,18.714758,5.0
479666,2019-01-01,68.352462,18.715684,5.0
479667,2019-01-01,68.352462,19.331425,11.0
479668,2019-01-01,68.352462,19.332351,11.0


## Each year in a separate dataset and keep only the first 183 days

In [318]:
Year = 2019
Number_of_days = 183
print('x = WLC(' + str(Year)+ ') joined with ERA5land(' + str(Year + 1) + ')')
print('y = WLC(' + str(Year + 1) + ')')

x = WLC(2019) joined with ERA5land(2020)
y = WLC(2020)


In [319]:
# Only keep locations with lichen for the current year
df = de.loc[de['time'] == str(Year) + '-01-01']
dg = de.loc[de['time'] == str(Year + 1) + '-01-01']

In [320]:
df

Unnamed: 0,time,lat,lon,Lichen
383736,2019-01-01,71.166351,25.812906,2.0
383737,2019-01-01,71.166351,25.813832,2.0
383738,2019-01-01,71.152462,25.693462,1.0
383739,2019-01-01,71.152462,25.694388,1.0
383740,2019-01-01,71.152462,25.695314,1.0
...,...,...,...,...
479665,2019-01-01,68.352462,18.714758,5.0
479666,2019-01-01,68.352462,18.715684,5.0
479667,2019-01-01,68.352462,19.331425,11.0
479668,2019-01-01,68.352462,19.332351,11.0


In [321]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [322]:
# Normalize the fractional cover
df['Lichen'] = df['Lichen'].div(100)
dg['Lichen'] = dg['Lichen'].div(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Lichen'] = df['Lichen'].div(100)


In [323]:
df

Unnamed: 0,time,lat,lon,Lichen
383736,2019-01-01,71.166351,25.812906,0.02
383737,2019-01-01,71.166351,25.813832,0.02
383738,2019-01-01,71.152462,25.693462,0.01
383739,2019-01-01,71.152462,25.694388,0.01
383740,2019-01-01,71.152462,25.695314,0.01
...,...,...,...,...
479665,2019-01-01,68.352462,18.714758,0.05
479666,2019-01-01,68.352462,18.715684,0.05
479667,2019-01-01,68.352462,19.331425,0.11
479668,2019-01-01,68.352462,19.332351,0.11


In [324]:
dg

Unnamed: 0,time,lat,lon,Lichen


In [325]:
# Convert to VAEX
dvx = vaex.from_pandas(df)
dvy = vaex.from_pandas(dg)

In [326]:
dvx

#,time,lat,lon,Lichen
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02
2,2019-01-01 00:00:00.000000000,71.15246220385194,25.69346165111196,0.01
3,2019-01-01 00:00:00.000000000,71.15246220385194,25.694387577037887,0.01
4,2019-01-01 00:00:00.000000000,71.15246220385194,25.69531350296381,0.01
...,...,...,...,...
95929,2019-01-01 00:00:00.000000000,68.35246220385172,18.714757947407698,0.05
95930,2019-01-01 00:00:00.000000000,68.35246220385172,18.71568387333362,0.05
95931,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11
95932,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11


In [327]:
dvy

#,time,lat,lon,Lichen
,,,,


In [328]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
dvx['ERA5_lon_index'] = ((dvx.lon - 15.59) / 0.1).astype('int').values
dvx['ERA5_lat_index'] = 28 - ((dvx.lat - 68.35) / 0.1).astype('int').values
dvy['ERA5_lon_index'] = ((dvy.lon - 15.59) / 0.1).astype('int').values
dvy['ERA5_lat_index'] = 28 - ((dvy.lat - 68.35) / 0.1).astype('int').values

In [329]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02,102,0
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02,102,0
2,2019-01-01 00:00:00.000000000,71.15246220385194,25.69346165111196,0.01,101,0
3,2019-01-01 00:00:00.000000000,71.15246220385194,25.694387577037887,0.01,101,0
4,2019-01-01 00:00:00.000000000,71.15246220385194,25.69531350296381,0.01,101,0
...,...,...,...,...,...,...
95929,2019-01-01 00:00:00.000000000,68.35246220385172,18.714757947407698,0.05,31,28
95930,2019-01-01 00:00:00.000000000,68.35246220385172,18.71568387333362,0.05,31,28
95931,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11,37,28
95932,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11,37,28


# Adding columns with the ERA5-land longitude and latitude to dv

In [330]:
dvx['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvx['ERA5_lon_index'].values].values
dvx['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvx['ERA5_lat_index'].values].values
dvy['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dvy['ERA5_lon_index'].values].values
dvy['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dvy['ERA5_lat_index'].values].values

In [331]:
# Add combined lon_lat column to dv x & y
dvx['lon_lat'] = (dvx['ERA5_lon'] * 100).astype('int') + (dvx['ERA5_lat'] * 100).astype('int') / 100000
dvy['lon_lat'] = (dvy['ERA5_lon'] * 100).astype('int') + (dvy['ERA5_lat'] * 100).astype('int') / 100000

In [332]:
dvx

#,time,lat,lon,Lichen,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lon_lat
0,2019-01-01 00:00:00.000000000,71.16635109274083,25.812906095556414,0.02,102,0,25.79,71.15,2579.07115
1,2019-01-01 00:00:00.000000000,71.16635109274083,25.813832021482337,0.02,102,0,25.79,71.15,2579.07115
2,2019-01-01 00:00:00.000000000,71.15246220385194,25.69346165111196,0.01,101,0,25.69,71.15,2569.07115
3,2019-01-01 00:00:00.000000000,71.15246220385194,25.694387577037887,0.01,101,0,25.69,71.15,2569.07115
4,2019-01-01 00:00:00.000000000,71.15246220385194,25.69531350296381,0.01,101,0,25.69,71.15,2569.07115
...,...,...,...,...,...,...,...,...,...
95929,2019-01-01 00:00:00.000000000,68.35246220385172,18.714757947407698,0.05,31,28,18.69,68.35,1869.06835
95930,2019-01-01 00:00:00.000000000,68.35246220385172,18.71568387333362,0.05,31,28,18.69,68.35,1869.06835
95931,2019-01-01 00:00:00.000000000,68.35246220385172,19.33142461407441,0.11,37,28,19.29,68.35,1929.06835
95932,2019-01-01 00:00:00.000000000,68.35246220385172,19.332350540000338,0.11,37,28,19.29,68.35,1929.06835


In [333]:
dvx = dvx.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])
dvy = dvy.drop(columns=['time', 'lat', 'lon', 'ERA5_lon', 'ERA5_lat'])

In [334]:
dvx

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
0,0.02,102,0,2579.07115
1,0.02,102,0,2579.07115
2,0.01,101,0,2569.07115
3,0.01,101,0,2569.07115
4,0.01,101,0,2569.07115
...,...,...,...,...
95929,0.05,31,28,1869.06835
95930,0.05,31,28,1869.06835
95931,0.11,37,28,1929.06835
95932,0.11,37,28,1929.06835


In [335]:
dvy

#,Lichen,ERA5_lon_index,ERA5_lat_index,lon_lat
,,,,


# Calculate the mean fractional cover for each ERA5-land grid cell

In [336]:
dmx = dvx.groupby(by='lon_lat', agg='mean')
dmy = dvy.groupby(by='lon_lat', agg='mean')

In [337]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
0,2239.07065,0.08479999899864196,68.0,5.0
1,2459.07055,0.1599999964237213,90.0,6.0
2,2049.06894,0.15907407524409117,49.0,22.0
3,2129.06835,0.16111111475361717,57.0,28.0
4,2519.06985,0.09820512825479874,96.0,13.0
...,...,...,...,...
1742,2509.07065,0.09500000067055225,95.0,5.0
1743,2229.07055,0.10326530696938233,67.0,6.0
1744,3009.07044,0.047878788163264595,145.0,7.0
1745,2569.06925,0.029999999329447746,101.0,19.0


In [338]:
dmy

#,lon_lat,Lichen_mean,ERA5_lon_index_mean,ERA5_lat_index_mean
,,,,


In [339]:
dmx['ERA5_lon_index'] = dmx['ERA5_lon_index_mean'].astype('int')
dmx['ERA5_lat_index'] = dmx['ERA5_lat_index_mean'].astype('int')

In [340]:
dmx = dmx.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])
dmy = dmy.drop(columns=['ERA5_lon_index_mean', 'ERA5_lat_index_mean'])

In [341]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon_index,ERA5_lat_index
0,2239.07065,0.08479999899864196,68,5
1,2459.07055,0.1599999964237213,90,6
2,2049.06894,0.15907407524409117,49,22
3,2129.06835,0.16111111475361717,57,28
4,2519.06985,0.09820512825479874,96,13
...,...,...,...,...
1742,2509.07065,0.09500000067055225,95,5
1743,2229.07055,0.10326530696938233,67,6
1744,3009.07044,0.047878788163264595,145,7
1745,2569.06925,0.029999999329447746,101,19


In [342]:
dmy

#,lon_lat,Lichen_mean
,,


In [343]:
dmx['ERA5_lon'] = Longitudes[dmx['ERA5_lon_index'].values].values
dmx['ERA5_lat'] = Latitudes[dmx['ERA5_lat_index'].values].values

In [344]:
dmx = dmx.drop(columns=['ERA5_lon_index', 'ERA5_lat_index'])

In [345]:
dmx

#,lon_lat,Lichen_mean,ERA5_lon,ERA5_lat
0,2239.07065,0.08479999899864196,22.389999389648438,70.6500015258789
1,2459.07055,0.1599999964237213,24.59000015258789,70.55000305175781
2,2049.06894,0.15907407524409117,20.489999771118164,68.94999694824219
3,2129.06835,0.16111111475361717,21.290000915527344,68.3499984741211
4,2519.06985,0.09820512825479874,25.190000534057617,69.8499984741211
...,...,...,...,...
1742,2509.07065,0.09500000067055225,25.09000015258789,70.6500015258789
1743,2229.07055,0.10326530696938233,22.290000915527344,70.55000305175781
1744,3009.07044,0.047878788163264595,30.09000015258789,70.44999694824219
1745,2569.06925,0.029999999329447746,25.690000534057617,69.25


## Extract ERA5 data for  the selected period of the year (when RoS events mostly occur)

In [346]:
ERA5 = ERA5land.sel(time=slice(str(Year + 1) + '-01-01', str(Year + 1)  + '-12-31'))

In [347]:
ERA5 = ERA5.isel(time=range(Number_of_days * 24))

In [348]:
ERA5 = ERA5.isel(expver = 0)

In [349]:
ERA5

In [None]:
# Extract ERA5 t2m, tp and sd fields 
ERA5_t2m = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['t2m']
ERA5_tp = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['tp']
ERA5_sd = ERA5.where(ERA5['latitude'].isin(dmx['ERA5_lat'].values) & ERA5['longitude'].isin(dmx['ERA5_lon'].values))['sd']

In [None]:
ERA5_t2m

## Rain on Snow criteria (according to https://www.hydrol-earth-syst-sci.net/23/2983/2019/hess-23-2983-2019.pdf)
 * total rainfall volume of at least 20 mm within 12 h
### or 
 * air temperatures above 0C (273.15K)
 * and initial snowpack depth of at least 10 cm

In [None]:
# Normalizing temperature, total precipitation and snow depth values accordidng to these criteria
ERA5_t2m = ERA5_t2m / 273.15
ERA5_tp = ERA5_tp / 0.02 * 12.
ERA5_sd = ERA5_sd / 0.1

In [None]:
dh_t2m = ERA5_t2m.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_tp = ERA5_tp.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()
dh_sd = ERA5_sd.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [None]:
dh_tp = dh_tp.drop(columns=['latitude', 'longitude'])
dh_sd = dh_sd.drop(columns=['latitude', 'longitude'])

In [None]:
# Create labels for ERA5-land variables to replace the dates
label_t2m = ['latitude', 'longitude']
label_tp = list()
label_sd = list()
for i in range(Number_of_days * 24):
    label_t2m.append('t2m_'+ str(i))
    label_tp.append('tp_'+ str(i))
    label_sd.append('sd_'+ str(i))

In [None]:
dh_t2m.set_axis(label_t2m, axis="columns", inplace=True)
dh_tp.set_axis(label_tp, axis="columns", inplace=True)
dh_sd.set_axis(label_sd, axis="columns", inplace=True)

In [None]:
dh_t2m

In [None]:
dh_tp

In [None]:
dh_sd

In [None]:
#  Glue together dh_t2m and dh_tp  <- not dh_sd
dh = pd.concat([dh_t2m, dh_tp], axis = 1)

In [None]:
dh

In [None]:
# Add combined lon_lat column to dh
dh['ERA5_lon_lat'] = (dh['longitude'] * 100).astype('int') + (dh['latitude'] * 100).astype('int') / 100000

In [None]:
# Drop latitude and longitude columns which are not used anymore in dh
dh = dh.drop(columns=['latitude', 'longitude'])

In [None]:
dh

In [None]:
dmx

In [None]:
dmx = dmx.drop(columns=['ERA5_lon', 'ERA5_lat'])

In [None]:
# Convert to panda dw x & y
dwx_pandas = dmx.to_pandas_df()
dwy_pandas = dmy.to_pandas_df()

## Join dwx (WLC) with dh (ERA5 t2m-tp-sd)

In [None]:
dwx_pandas

In [None]:
# Join dwx (WLC) with df (ERA5 t2m-tp-sd)
dx = dwx_pandas.set_index('lon_lat').join(dh.set_index('ERA5_lon_lat'), on='lon_lat')

In [None]:
dx

In [None]:
# Drop the Rows with NaN Values
dx = dx.dropna()

In [305]:
dx = dx.reset_index()

In [306]:
dx

Unnamed: 0,lon_lat,Lichen_mean,t2m_0,t2m_1,t2m_2,t2m_3,t2m_4,t2m_5,t2m_6,t2m_7,...,tp_4382,tp_4383,tp_4384,tp_4385,tp_4386,tp_4387,tp_4388,tp_4389,tp_4390,tp_4391
0,2459.07055,0.110000,0.968329,0.968344,0.968841,0.969625,0.971999,0.975627,0.979889,0.981955,...,1.639191,1.685303,1.746490,1.816766,1.850020,1.933598,2.045996,2.151521,2.256603,2.336412
1,2419.06875,0.040000,0.973603,0.973115,0.972207,0.970714,0.968611,0.967022,0.965296,0.968091,...,1.146148,1.157897,1.175854,1.193812,1.216202,1.255663,1.304214,1.416612,1.504402,1.562707
2,2609.06994,0.165000,0.955466,0.955622,0.955690,0.955431,0.957508,0.961315,0.965759,0.972094,...,2.118932,2.139993,2.154625,2.165045,2.175464,2.328875,2.512436,2.637692,2.713511,2.777579
3,2419.06994,0.160000,0.954781,0.953219,0.952014,0.951574,0.952160,0.952921,0.956507,0.968870,...,3.554167,3.635085,3.720658,3.774750,3.802240,3.902445,4.058738,4.280873,4.503009,4.774138
4,2479.06975,0.056250,0.956112,0.955335,0.954141,0.953363,0.953724,0.954732,0.958189,0.970330,...,3.282816,3.308754,3.363069,3.381248,3.392997,3.512045,3.739723,4.035682,4.330976,4.656420
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653,2209.06975,0.030000,0.956498,0.957022,0.957874,0.958275,0.957075,0.956487,0.956942,0.961819,...,3.129627,3.177734,3.202120,3.212096,3.216087,3.233157,3.246236,3.255991,3.263085,3.272396
654,1869.06855,0.117398,0.967437,0.967310,0.966509,0.966239,0.965688,0.965218,0.964373,0.964481,...,0.844868,0.862825,0.870806,0.873910,0.875018,0.878122,0.879009,0.879230,0.879674,0.879674
655,2579.07105,0.077500,0.990227,0.990582,0.991465,0.992337,0.991467,0.991347,0.990251,0.988407,...,0.771045,0.901399,1.045943,1.135285,1.190929,1.260984,1.346779,1.412400,1.443437,1.458955
656,2609.06915,0.100000,0.970596,0.970117,0.969818,0.968757,0.966935,0.966605,0.967310,0.973858,...,2.830786,3.297669,3.475910,3.574341,3.684965,3.813768,4.024376,4.150740,4.290850,4.398813


In [307]:
## Save into **local** HDF5 file without index
x_filename = os.path.join(path, 'x_mean_tp_' + str(Year) + '.hdf')
print(x_filename)
dx.to_hdf(x_filename, key='df', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/x_mean_tp_2018.hdf


## Find locations with lichen in the following year corresponding to those in current year

In [308]:
dwx_pandas = dx[['lon_lat']]

In [309]:
dwx_pandas

Unnamed: 0,lon_lat
0,2459.07055
1,2419.06875
2,2609.06994
3,2419.06994
4,2479.06975
...,...
653,2209.06975
654,1869.06855
655,2579.07105
656,2609.06915


In [310]:
dwy_pandas

Unnamed: 0,lon_lat,Lichen_mean
0,2239.07065,0.084800
1,2459.07055,0.160000
2,2049.06894,0.159074
3,2129.06835,0.161111
4,2519.06985,0.098205
...,...,...
1742,2509.07065,0.095000
1743,2229.07055,0.103265
1744,3009.07044,0.047879
1745,2569.06925,0.030000


In [311]:
## Join dwx with dwy
dy = dwx_pandas.set_index('lon_lat').join(dwy_pandas.set_index('lon_lat'), on='lon_lat')

In [312]:
dy

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,0.160000
2419.06875,0.050000
2609.06994,0.174487
2419.06994,0.140000
2479.06975,0.065625
...,...
2209.06975,0.030000
1869.06855,0.155488
2579.07105,0.122500
2609.06915,0.130000


In [313]:
dy.fillna(0)

Unnamed: 0_level_0,Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,0.160000
2419.06875,0.050000
2609.06994,0.174487
2419.06994,0.140000
2479.06975,0.065625
...,...
2209.06975,0.030000
1869.06855,0.155488
2579.07105,0.122500
2609.06915,0.130000


In [314]:
dy.rename(columns = {'Lichen_mean' : 'new_Lichen_mean'}, inplace = True)

In [315]:
dy

Unnamed: 0_level_0,new_Lichen_mean
lon_lat,Unnamed: 1_level_1
2459.07055,0.160000
2419.06875,0.050000
2609.06994,0.174487
2419.06994,0.140000
2479.06975,0.065625
...,...
2209.06975,0.030000
1869.06855,0.155488
2579.07105,0.122500
2609.06915,0.130000


In [316]:
## Save into **local** HDF5 file without index
y_filename = os.path.join(path, 'y_mean_tp_' + str(Year) + '.hdf')
print(y_filename)
dy.to_hdf(y_filename, key='dg', mode="w", index=False)

/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/data/y_mean_tp_2018.hdf


In [317]:
print('Finished!')

Finished!
