# Prepare labelled input for the NN
# (i.e. locations where moss&lichen fractional cover changed and related meteorological parameters from ERA5-Land)

# Copernicus Global Land Cover data  from 2015-01-01 to 2019-12-31 already available as a netCDF file stored on EOSC (CESNET)
## Troms og Finnmark
### Mosses and lichens, bare, grass, shrubs and trees

In [1]:
!date

Wed Mar  1 14:05:04 UTC 2023


In [2]:
pip install vaex

Note: you may need to restart the kernel to use updated packages.


### Define s3 storage parameters

In [3]:
import s3fs
import xarray as xr
import pandas as pd
import h3
import vaex

In [4]:
client_kwargs={'endpoint_url': 'https://object-store.cloud.muni.cz'}
store = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs)
store.ls('Data', detail=True, refresh=True)

[{'Key': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 12, 44, 42, 95000, tzinfo=tzlocal()),
  'ETag': '"70e848cfeba6b4e2db997b6efb0ad947-8"',
  'Size': 397191332,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 397191332,
  'name': 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'},
 {'Key': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc',
  'LastModified': datetime.datetime(2023, 2, 1, 9, 53, 15, 635000, tzinfo=tzlocal()),
  'ETag': '"e48be2b71e08b38d296a0ea6db979b09-23"',
  'Size': 1182124070,
  'StorageClass': 'STANDARD',
  'type': 'file',
  'size': 1182124070,
  'name': 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'},
 {'Key': 'Data/reanalysis-era5-single-levels_2015-2019.nc',
  'LastModified': datetime.datetime(2023, 1, 6, 7, 24, 40, 847000, tzinfo=tzlocal()),
  'ETag': '"2582b929cffaa1770daa45a508dc689a-8"',
  'Size': 391

### Define s3 store for the **netCDF file**

In [5]:
s3path = 'Data/C_GlobalLandCover_20150101_20190101_Troms-Finnmark.nc'

In [6]:
GLC_AOI = xr.open_dataset(store.open(s3path))

In [7]:
GLC_AOI

In [8]:
GLC_AOI = GLC_AOI.rename(x='lon', y='lat', t='time')

In [9]:
# Drop variables not directly of interest here
GLC_AOI = GLC_AOI.drop_vars(['crs',
                             'Crops_CoverFraction_layer',
                             'Discrete_Classification_map', 
                             'Discrete_Classification_proba',
                             'Forest_Type_layer',
                             'Snow_CoverFraction_layer',
                             'BuiltUp_CoverFraction_layer',
                             'PermanentWater_CoverFraction_layer',
                             'SeasonalWater_CoverFraction_layer',
                             'DataDensityIndicator',
                             'Change_Confidence_layer',
                             'dataMask'])

In [10]:
GLC_AOI = GLC_AOI.rename(Bare_CoverFraction_layer = 'Bare',
                         Grass_CoverFraction_layer = 'Grass',
                         MossLichen_CoverFraction_layer = 'Lichen',
                         Shrub_CoverFraction_layer = 'Shrub',
                         Tree_CoverFraction_layer = 'Tree')

In [11]:
GLC_AOI

In [12]:
# Troms & Finnmark Global Land Cover area
GLC_AOI_min_lon = GLC_AOI.lon.min()
GLC_AOI_max_lon = GLC_AOI.lon.max()
GLC_AOI_min_lat = GLC_AOI.lat.min()
GLC_AOI_max_lat = GLC_AOI.lat.max()
print(GLC_AOI_min_lon, GLC_AOI_max_lon, GLC_AOI_min_lat, GLC_AOI_max_lat)

<xarray.DataArray 'lon' ()>
array(15.5953135) <xarray.DataArray 'lon' ()>
array(31.06568387) <xarray.DataArray 'lat' ()>
array(68.35153628) <xarray.DataArray 'lat' ()>
array(71.18764739)


### The two cells below redefine a very small region for testing purposes only - skip them to keep the whole Troms-finnmark area

In [13]:
# Small region 
Small_AOI_min_lon = 19.65
Small_AOI_max_lon = 19.7
Small_AOI_min_lat = 69.05
Small_AOI_max_lat = 69.1

In [14]:
#df = GLC_AOI.isel(time = 0).sel(lat=slice(Small_AOI_max_lat, Small_AOI_min_lat), lon=slice(Small_AOI_min_lon, Small_AOI_max_lon)).to_dataframe()

In [15]:
from h3 import h3
import folium

def visualize_hexagons(hexagons, color="red", folium_map=None):
    """
    hexagons is a list of hexcluster. Each hexcluster is a list of hexagons. 
    eg. [[hex1, hex2], [hex3, hex4]]
    """
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    else:
        m = folium_map
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
        m.add_child(my_PolyLine)
    return m
    

def visualize_polygon(polyline, color):
    polyline.append(polyline[0])
    lat = [p[0] for p in polyline]
    lng = [p[1] for p in polyline]
    m = folium.Map(location=[sum(lat)/len(lat), sum(lng)/len(lng)], zoom_start=13, tiles='cartodbpositron')
    my_PolyLine=folium.PolyLine(locations=polyline,weight=8,color=color)
    m.add_child(my_PolyLine)
    return m

In [16]:
Small_AOI_center = h3.geo_to_h3((Small_AOI_min_lat + Small_AOI_max_lat)/2, (Small_AOI_min_lon +Small_AOI_max_lon)/2, 9) # lat, lng, hex resolution    
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_min_lon, 9)], color="red")                                                                                        
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_min_lon, 9)], color="red", folium_map=m)                                                                                   
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_max_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)                                                                                
m = visualize_hexagons([h3.geo_to_h3(GLC_AOI_min_lat, GLC_AOI_max_lon, 9)], color="red", folium_map=m)
#m = visualize_hexagons([Small_AOI_center], color="green", folium_map=m) 
display(m)

In [17]:
GLC_AOI

## Each year in a separate dataset
> WLC - 2019

In [18]:
df = GLC_AOI.isel(time = 4).to_dataframe()

In [19]:
df = df.reset_index()

In [20]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595314,71.187647,2019-01-01,255.0,255.0,255.0,255.0,255.0
1,15.595314,71.186721,2019-01-01,255.0,255.0,255.0,255.0,255.0
2,15.595314,71.185796,2019-01-01,255.0,255.0,255.0,255.0,255.0
3,15.595314,71.184870,2019-01-01,255.0,255.0,255.0,255.0,255.0
4,15.595314,71.183944,2019-01-01,255.0,255.0,255.0,255.0,255.0
...,...,...,...,...,...,...,...,...
51196371,31.065684,68.355240,2019-01-01,,,,,
51196372,31.065684,68.354314,2019-01-01,,,,,
51196373,31.065684,68.353388,2019-01-01,,,,,
51196374,31.065684,68.352462,2019-01-01,,,,,


In [21]:
# Only keep locations where there is some moss & lichen
df = df.loc[(df['Lichen'] > 0) & (df['Lichen'] <= 100)]

In [22]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
2519,15.595314,68.855240,2019-01-01,,44.0,3.0,27.0,26.0
2520,15.595314,68.854314,2019-01-01,,48.0,7.0,23.0,22.0
2521,15.595314,68.853388,2019-01-01,,41.0,3.0,17.0,39.0
2678,15.595314,68.708018,2019-01-01,,36.0,9.0,20.0,35.0
2716,15.595314,68.672833,2019-01-01,,40.0,21.0,14.0,25.0
...,...,...,...,...,...,...,...,...
51191827,31.064758,69.725610,2019-01-01,,49.0,18.0,25.0,8.0
51191828,31.064758,69.724684,2019-01-01,,48.0,17.0,27.0,8.0
51191829,31.064758,69.723759,2019-01-01,,49.0,19.0,21.0,11.0
51191831,31.064758,69.721907,2019-01-01,,44.0,38.0,18.0,


In [23]:
# Replace NaNs by 0
for col in ['Bare', 'Grass', 'Lichen', 'Shrub', 'Tree']:
    print(col)
    df[col] = df[col].fillna(0)

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].fillna(0)


In [24]:
# Calculate total fractional coverage of bare, grass, lichen, shrub and tree (should be 100)
df['Total']  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Total']  = (df['Bare'] + df['Grass'] + df['Lichen'] + df['Shrub'] + df['Tree'])


In [25]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,Total
2519,15.595314,68.855240,2019-01-01,0.0,44.0,3.0,27.0,26.0,100.0
2520,15.595314,68.854314,2019-01-01,0.0,48.0,7.0,23.0,22.0,100.0
2521,15.595314,68.853388,2019-01-01,0.0,41.0,3.0,17.0,39.0,100.0
2678,15.595314,68.708018,2019-01-01,0.0,36.0,9.0,20.0,35.0,100.0
2716,15.595314,68.672833,2019-01-01,0.0,40.0,21.0,14.0,25.0,100.0
...,...,...,...,...,...,...,...,...,...
51191827,31.064758,69.725610,2019-01-01,0.0,49.0,18.0,25.0,8.0,100.0
51191828,31.064758,69.724684,2019-01-01,0.0,48.0,17.0,27.0,8.0,100.0
51191829,31.064758,69.723759,2019-01-01,0.0,49.0,19.0,21.0,11.0,100.0
51191831,31.064758,69.721907,2019-01-01,0.0,44.0,38.0,18.0,0.0,100.0


In [26]:
# Normalize the fractional cover
for col in ['Bare', 'Grass', 'Lichen', 'Shrub', 'Tree']:
    print(col)
    df[col] = df[col] / df['Total']

Bare
Grass
Lichen
Shrub
Tree


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col] / df['Total']


In [27]:
# Drop the *tot* column
df = df.drop(['Total'], axis=1)

In [28]:
df

Unnamed: 0,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
2519,15.595314,68.855240,2019-01-01,0.0,0.44,0.03000,0.27000,0.2600
2520,15.595314,68.854314,2019-01-01,0.0,0.48,0.07000,0.23000,0.2200
2521,15.595314,68.853388,2019-01-01,0.0,0.41,0.03000,0.17000,0.3900
2678,15.595314,68.708018,2019-01-01,0.0,0.36,0.09000,0.20000,0.3500
2716,15.595314,68.672833,2019-01-01,0.0,0.40,0.21000,0.14000,0.2500
...,...,...,...,...,...,...,...,...
51191827,31.064758,69.725610,2019-01-01,0.0,0.49,0.18000,0.25000,0.0800
51191828,31.064758,69.724684,2019-01-01,0.0,0.48,0.17000,0.27000,0.0800
51191829,31.064758,69.723759,2019-01-01,0.0,0.49,0.19000,0.21000,0.1100
51191831,31.064758,69.721907,2019-01-01,0.0,0.44,0.38000,0.18000,0.0000


In [29]:
# Convert to VAEX
dv = vaex.from_pandas(df)

In [30]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree
0,15.595313502963002,68.85523998162952,2019-01-01 00:00:00.000000000,0.0,0.44,0.03,0.27,0.26
1,15.595313502963002,68.8543140557036,2019-01-01 00:00:00.000000000,0.0,0.48,0.07,0.23,0.22
2,15.595313502963002,68.85338812977767,2019-01-01 00:00:00.000000000,0.0,0.41,0.03,0.17,0.39
3,15.595313502963002,68.7080177594073,2019-01-01 00:00:00.000000000,0.0,0.36,0.09,0.2,0.35
4,15.595313502963002,68.6728325742221,2019-01-01 00:00:00.000000000,0.0,0.4,0.21,0.14,0.25
...,...,...,...,...,...,...,...,...
666694,31.064757947408683,69.72561035199996,2019-01-01 00:00:00.000000000,0.0,0.49,0.18,0.25,0.08
666695,31.064757947408683,69.72468442607405,2019-01-01 00:00:00.000000000,0.0,0.48,0.17,0.27,0.08
666696,31.064757947408683,69.72375850014812,2019-01-01 00:00:00.000000000,0.0,0.49,0.19,0.21,0.11
666697,31.064757947408683,69.72190664829627,2019-01-01 00:00:00.000000000,0.0,0.44,0.38,0.18,0.0


In [31]:
# Find the correspondind ERA5-land lat-lon
# Careful with the latitude, in reverse order
ERA5_lon = ((dv.lon - 15.59) / 0.1).astype('int').values
ERA5_lat = 28 - ((dv.lat - 68.35) / 0.1).astype('int').values

In [32]:
dv['ERA5_lon_index'] = ERA5_lon
dv['ERA5_lat_index'] = ERA5_lat

In [33]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index
0,15.595313502963002,68.85523998162952,2019-01-01 00:00:00.000000000,0.0,0.44,0.03,0.27,0.26,0,23
1,15.595313502963002,68.8543140557036,2019-01-01 00:00:00.000000000,0.0,0.48,0.07,0.23,0.22,0,23
2,15.595313502963002,68.85338812977767,2019-01-01 00:00:00.000000000,0.0,0.41,0.03,0.17,0.39,0,23
3,15.595313502963002,68.7080177594073,2019-01-01 00:00:00.000000000,0.0,0.36,0.09,0.2,0.35,0,25
4,15.595313502963002,68.6728325742221,2019-01-01 00:00:00.000000000,0.0,0.4,0.21,0.14,0.25,0,25
...,...,...,...,...,...,...,...,...,...,...
666694,31.064757947408683,69.72561035199996,2019-01-01 00:00:00.000000000,0.0,0.49,0.18,0.25,0.08,154,15
666695,31.064757947408683,69.72468442607405,2019-01-01 00:00:00.000000000,0.0,0.48,0.17,0.27,0.08,154,15
666696,31.064757947408683,69.72375850014812,2019-01-01 00:00:00.000000000,0.0,0.49,0.19,0.21,0.11,154,15
666697,31.064757947408683,69.72190664829627,2019-01-01 00:00:00.000000000,0.0,0.44,0.38,0.18,0.0,154,15


# ERA5-land data from 2015-01-01 to 2019-12-31 - already available as a netCDF file stored on EOSC (CESNET)
## 2m Temperature, Snow depth, Total precipitation
## **For now will only use t2p in the ML algorithm** although it may be useful to know about rain and snow depth

In [34]:
s3path = 'Data/reanalysis-era5-land_hourly_2015-01-01_2019-12-31_Troms-Finnmark_T2m-SD-TP.nc'

In [35]:
ERA5land = xr.open_dataset(store.open(s3path))

In [36]:
ERA5land

# ERA5-land t2m mean = 266.87485 and standard deviation = 12.939538955688477

In [37]:
# Calculate the first time using the entire dataset
t2m_mean = ERA5land.t2m.mean(skipna=True).values
print('Mean of the ERA5-Land 2m temperature: ', t2m_mean)
t2m_std = ERA5land.t2m.std(skipna=True).values
print('Standard deviation of the ERA5-Land 2m temperature: ', t2m_std)
# Set once and for all
t2m_mean = 266.87485
t2m_std = 12.939538955688477

Mean of the ERA5-Land 2m temperature:  263.68018
Standard deviation of the ERA5-Land 2m temperature:  91.72649383544922


# Adding columns with the ERA5-land longitude and latitude to dv

In [38]:
dv['ERA5_lon'] = ERA5land.sel(time="2015-01-01").longitude[dv['ERA5_lon_index'].values].values

In [39]:
dv['ERA5_lat'] = ERA5land.sel(time="2015-01-01").latitude[dv['ERA5_lat_index'].values].values

In [40]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat
0,15.595313502963002,68.85523998162952,2019-01-01 00:00:00.000000000,0.0,0.44,0.03,0.27,0.26,0,23,15.59,68.85
1,15.595313502963002,68.8543140557036,2019-01-01 00:00:00.000000000,0.0,0.48,0.07,0.23,0.22,0,23,15.59,68.85
2,15.595313502963002,68.85338812977767,2019-01-01 00:00:00.000000000,0.0,0.41,0.03,0.17,0.39,0,23,15.59,68.85
3,15.595313502963002,68.7080177594073,2019-01-01 00:00:00.000000000,0.0,0.36,0.09,0.2,0.35,0,25,15.59,68.65
4,15.595313502963002,68.6728325742221,2019-01-01 00:00:00.000000000,0.0,0.4,0.21,0.14,0.25,0,25,15.59,68.65
...,...,...,...,...,...,...,...,...,...,...,...,...
666694,31.064757947408683,69.72561035199996,2019-01-01 00:00:00.000000000,0.0,0.49,0.18,0.25,0.08,154,15,30.99,69.65
666695,31.064757947408683,69.72468442607405,2019-01-01 00:00:00.000000000,0.0,0.48,0.17,0.27,0.08,154,15,30.99,69.65
666696,31.064757947408683,69.72375850014812,2019-01-01 00:00:00.000000000,0.0,0.49,0.19,0.21,0.11,154,15,30.99,69.65
666697,31.064757947408683,69.72190664829627,2019-01-01 00:00:00.000000000,0.0,0.44,0.38,0.18,0.0,154,15,30.99,69.65


In [41]:
import numpy as np

In [42]:
#ERA5land.sel(time="2015-01-01").where(ERA5land["latitude"].isin(dv['ERA5_lat'].values) & ERA5land["longitude"].isin(dv['ERA5_lon'].values))["t2m"].isel(time=0).plot()

In [43]:
#ERA5land.sel(time="2015-03-01").where(ERA5land["latitude"].isin(dv['ERA5_lat'].values) & ERA5land["longitude"].isin(dv['ERA5_lon'].values))["t2m"].isel(time=0).plot()

## Extract ERA5 data for  the **first 6 months** of the year (when RoS events mostly occur)
> ERA5 - 2019

In [44]:
ERA5land = ERA5land.sel(time=slice("2019-01-01", "2019-06-30"))

In [45]:
ERA5land

In [46]:
# Extract ERA5 't2m' field 
ERA5 = ERA5land.where(ERA5land['latitude'].isin(dv['ERA5_lat'].values) & ERA5land['longitude'].isin(dv['ERA5_lon'].values))['t2m']

In [47]:
ERA5 = (ERA5 -t2m_mean) / t2m_std

In [48]:
ERA5.shape[0]

4344

In [49]:
df = ERA5.stack(z=['latitude', 'longitude']).to_pandas().transpose().reset_index()

In [50]:
df

time,latitude,longitude,2019-01-01 00:00:00,2019-01-01 01:00:00,2019-01-01 02:00:00,2019-01-01 03:00:00,2019-01-01 04:00:00,2019-01-01 05:00:00,2019-01-01 06:00:00,2019-01-01 07:00:00,...,2019-06-30 14:00:00,2019-06-30 15:00:00,2019-06-30 16:00:00,2019-06-30 17:00:00,2019-06-30 18:00:00,2019-06-30 19:00:00,2019-06-30 20:00:00,2019-06-30 21:00:00,2019-06-30 22:00:00,2019-06-30 23:00:00
0,71.150002,15.590000,,,,,,,,,...,,,,,,,,,,
1,71.150002,15.690000,,,,,,,,,...,,,,,,,,,,
2,71.150002,15.790000,,,,,,,,,...,,,,,,,,,,
3,71.150002,15.890000,,,,,,,,,...,,,,,,,,,,
4,71.150002,15.990000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,68.349998,30.590000,-0.029134,-0.041736,-0.053118,-0.073117,-0.084733,-0.108105,-0.125155,-0.142251,...,-1.708926,-1.720401,-1.711971,-1.705741,-1.774266,-1.799793,-1.850193,-1.911412,-1.982841,1.031863
4491,68.349998,30.690001,-0.038457,-0.052976,-0.063469,-0.082999,-0.094289,-0.117145,-0.133634,-0.150355,...,-1.726256,-1.739745,-1.732346,-1.725038,-1.789864,-1.816422,-1.864245,-1.924245,-1.996565,1.020248
4492,68.349998,30.790001,-0.054429,-0.070542,-0.078832,-0.097146,-0.108013,-0.129230,-0.144735,-0.160801,...,-1.748037,-1.763212,-1.756889,-1.748880,-1.808318,-1.836141,-1.881294,-1.941155,-2.015395,1.003012
4493,68.349998,30.889999,-0.086419,-0.099346,-0.101174,-0.115273,-0.125155,-0.141174,-0.152697,-0.166749,...,-1.767849,-1.783026,-1.778060,-1.770847,-1.818904,-1.847898,-1.889023,-1.947758,-2.020265,0.999967


In [51]:
# Add combined lat-lon column to df
df['lat_lon'] = (df['latitude'] * 100).astype('int') + (df['longitude'] * 100).astype('int') / 100000

In [52]:
# Drop unused columns in df_t2m_2015
df = df.drop(columns=['latitude', 'longitude'])

In [53]:
df

time,2019-01-01 00:00:00,2019-01-01 01:00:00,2019-01-01 02:00:00,2019-01-01 03:00:00,2019-01-01 04:00:00,2019-01-01 05:00:00,2019-01-01 06:00:00,2019-01-01 07:00:00,2019-01-01 08:00:00,2019-01-01 09:00:00,...,2019-06-30 15:00:00,2019-06-30 16:00:00,2019-06-30 17:00:00,2019-06-30 18:00:00,2019-06-30 19:00:00,2019-06-30 20:00:00,2019-06-30 21:00:00,2019-06-30 22:00:00,2019-06-30 23:00:00,lat_lon
0,,,,,,,,,,,...,,,,,,,,,,7115.01559
1,,,,,,,,,,,...,,,,,,,,,,7115.01568
2,,,,,,,,,,,...,,,,,,,,,,7115.01578
3,,,,,,,,,,,...,,,,,,,,,,7115.01589
4,,,,,,,,,,,...,,,,,,,,,,7115.01598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4490,-0.029134,-0.041736,-0.053118,-0.073117,-0.084733,-0.108105,-0.125155,-0.142251,-0.178598,-0.199770,...,-1.720401,-1.711971,-1.705741,-1.774266,-1.799793,-1.850193,-1.911412,-1.982841,1.031863,6834.03059
4491,-0.038457,-0.052976,-0.063469,-0.082999,-0.094289,-0.117145,-0.133634,-0.150355,-0.186421,-0.208296,...,-1.739745,-1.732346,-1.725038,-1.789864,-1.816422,-1.864245,-1.924245,-1.996565,1.020248,6834.03069
4492,-0.054429,-0.070542,-0.078832,-0.097146,-0.108013,-0.129230,-0.144735,-0.160801,-0.196117,-0.218600,...,-1.763212,-1.756889,-1.748880,-1.808318,-1.836141,-1.881294,-1.941155,-2.015395,1.003012,6834.03079
4493,-0.086419,-0.099346,-0.101174,-0.115273,-0.125155,-0.141174,-0.152697,-0.166749,-0.199628,-0.222112,...,-1.783026,-1.778060,-1.770847,-1.818904,-1.847898,-1.889023,-1.947758,-2.020265,0.999967,6834.03088


In [54]:
# Add combined lat-lon column to dv
dv['lat_lon'] = (dv['ERA5_lat'] * 100).astype('int') + (dv['ERA5_lon'] * 100).astype('int') / 100000

In [55]:
dv

#,lon,lat,time,Bare,Grass,Lichen,Shrub,Tree,ERA5_lon_index,ERA5_lat_index,ERA5_lon,ERA5_lat,lat_lon
0,15.595313502963002,68.85523998162952,2019-01-01 00:00:00.000000000,0.0,0.44,0.03,0.27,0.26,0,23,15.59,68.85,6885.01559
1,15.595313502963002,68.8543140557036,2019-01-01 00:00:00.000000000,0.0,0.48,0.07,0.23,0.22,0,23,15.59,68.85,6885.01559
2,15.595313502963002,68.85338812977767,2019-01-01 00:00:00.000000000,0.0,0.41,0.03,0.17,0.39,0,23,15.59,68.85,6885.01559
3,15.595313502963002,68.7080177594073,2019-01-01 00:00:00.000000000,0.0,0.36,0.09,0.2,0.35,0,25,15.59,68.65,6865.01559
4,15.595313502963002,68.6728325742221,2019-01-01 00:00:00.000000000,0.0,0.4,0.21,0.14,0.25,0,25,15.59,68.65,6865.01559
...,...,...,...,...,...,...,...,...,...,...,...,...,...
666694,31.064757947408683,69.72561035199996,2019-01-01 00:00:00.000000000,0.0,0.49,0.18,0.25,0.08,154,15,30.99,69.65,6965.03099
666695,31.064757947408683,69.72468442607405,2019-01-01 00:00:00.000000000,0.0,0.48,0.17,0.27,0.08,154,15,30.99,69.65,6965.03099
666696,31.064757947408683,69.72375850014812,2019-01-01 00:00:00.000000000,0.0,0.49,0.19,0.21,0.11,154,15,30.99,69.65,6965.03099
666697,31.064757947408683,69.72190664829627,2019-01-01 00:00:00.000000000,0.0,0.44,0.38,0.18,0.0,154,15,30.99,69.65,6965.03099


In [56]:
# Drop unused columns in dv
dw = dv.drop(columns=['lon', 'lat', 'time', 'ERA5_lon_index', 'ERA5_lat_index', 'ERA5_lon', 'ERA5_lat'])

In [57]:
dw

#,Bare,Grass,Lichen,Shrub,Tree,lat_lon
0,0.0,0.44,0.03,0.27,0.26,6885.01559
1,0.0,0.48,0.07,0.23,0.22,6885.01559
2,0.0,0.41,0.03,0.17,0.39,6885.01559
3,0.0,0.36,0.09,0.2,0.35,6865.01559
4,0.0,0.4,0.21,0.14,0.25,6865.01559
...,...,...,...,...,...,...
666694,0.0,0.49,0.18,0.25,0.08,6965.03099
666695,0.0,0.48,0.17,0.27,0.08,6965.03099
666696,0.0,0.49,0.19,0.21,0.11,6965.03099
666697,0.0,0.44,0.38,0.18,0.0,6965.03099


In [58]:
dw_pandas = dw.to_pandas_df()

## Join dw (WLC) with dv (ERA5 t2m)

In [59]:
# With ERA5_t2m
dvw = dw_pandas.set_index('lat_lon').join(df.set_index('lat_lon'), on='lat_lon')

In [60]:
dvw

Unnamed: 0_level_0,Bare,Grass,Lichen,Shrub,Tree,2019-01-01 00:00:00,2019-01-01 01:00:00,2019-01-01 02:00:00,2019-01-01 03:00:00,2019-01-01 04:00:00,...,2019-06-30 14:00:00,2019-06-30 15:00:00,2019-06-30 16:00:00,2019-06-30 17:00:00,2019-06-30 18:00:00,2019-06-30 19:00:00,2019-06-30 20:00:00,2019-06-30 21:00:00,2019-06-30 22:00:00,2019-06-30 23:00:00
lat_lon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6885.01559,0.0,0.44,0.03000,0.27000,0.2600,,,,,,...,,,,,,,,,,
6885.01559,0.0,0.48,0.07000,0.23000,0.2200,,,,,,...,,,,,,,,,,
6885.01559,0.0,0.41,0.03000,0.17000,0.3900,,,,,,...,,,,,,,,,,
6865.01559,0.0,0.36,0.09000,0.20000,0.3500,0.184124,0.184874,0.208433,0.224357,0.230072,...,-1.870287,-1.859233,-1.852207,-1.832113,-1.838576,-1.894596,-1.948321,-2.007572,1.019264,1.007649
6865.01559,0.0,0.40,0.21000,0.14000,0.2500,0.184124,0.184874,0.208433,0.224357,0.230072,...,-1.870287,-1.859233,-1.852207,-1.832113,-1.838576,-1.894596,-1.948321,-2.007572,1.019264,1.007649
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6965.03099,0.0,0.49,0.18000,0.25000,0.0800,,,,,,...,,,,,,,,,,
6965.03099,0.0,0.48,0.17000,0.27000,0.0800,,,,,,...,,,,,,,,,,
6965.03099,0.0,0.49,0.19000,0.21000,0.1100,,,,,,...,,,,,,,,,,
6965.03099,0.0,0.44,0.38000,0.18000,0.0000,,,,,,...,,,,,,,,,,


In [61]:
# Drop the Rows with NaN Values
dvw = dvw.dropna()

In [62]:
dvw

Unnamed: 0_level_0,Bare,Grass,Lichen,Shrub,Tree,2019-01-01 00:00:00,2019-01-01 01:00:00,2019-01-01 02:00:00,2019-01-01 03:00:00,2019-01-01 04:00:00,...,2019-06-30 14:00:00,2019-06-30 15:00:00,2019-06-30 16:00:00,2019-06-30 17:00:00,2019-06-30 18:00:00,2019-06-30 19:00:00,2019-06-30 20:00:00,2019-06-30 21:00:00,2019-06-30 22:00:00,2019-06-30 23:00:00
lat_lon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6865.01559,0.0,0.360000,0.090000,0.20000,0.350000,0.184124,0.184874,0.208433,0.224357,0.230072,...,-1.870287,-1.859233,-1.852207,-1.832113,-1.838576,-1.894596,-1.948321,-2.007572,1.019264,1.007649
6865.01559,0.0,0.400000,0.210000,0.14000,0.250000,0.184124,0.184874,0.208433,0.224357,0.230072,...,-1.870287,-1.859233,-1.852207,-1.832113,-1.838576,-1.894596,-1.948321,-2.007572,1.019264,1.007649
6865.01559,0.0,0.410000,0.230000,0.14000,0.220000,0.184124,0.184874,0.208433,0.224357,0.230072,...,-1.870287,-1.859233,-1.852207,-1.832113,-1.838576,-1.894596,-1.948321,-2.007572,1.019264,1.007649
6844.01559,0.0,0.490000,0.060000,0.21000,0.240000,0.266512,0.258456,0.260706,0.261828,0.263468,...,-1.936189,-1.887242,-1.840685,-1.795157,-1.778950,-1.808788,-1.859466,-1.918016,-1.956377,-1.967618
6844.01559,0.0,0.420000,0.010000,0.27000,0.300000,0.266512,0.258456,0.260706,0.261828,0.263468,...,-1.936189,-1.887242,-1.840685,-1.795157,-1.778950,-1.808788,-1.859466,-1.918016,-1.956377,-1.967618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6905.03079,0.0,0.348485,0.106061,0.19697,0.348485,-0.158692,-0.162204,-0.160846,-0.175836,-0.204782,...,-1.774594,-1.790145,-1.771644,-1.783447,-1.820543,-1.855392,-1.918110,-1.995862,1.009099,0.949567
7025.03079,0.0,0.560000,0.100000,0.14000,0.200000,0.150867,0.176348,0.191664,0.235364,0.289512,...,-1.857687,-1.868647,-1.877031,-1.898063,-1.930288,-1.972489,1.040481,0.985071,0.936968,0.906711
6905.03079,0.0,0.348485,0.106061,0.19697,0.348485,-0.158692,-0.162204,-0.160846,-0.175836,-0.204782,...,-1.774594,-1.790145,-1.771644,-1.783447,-1.820543,-1.855392,-1.918110,-1.995862,1.009099,0.949567
7025.03079,0.0,0.560000,0.100000,0.14000,0.200000,0.150867,0.176348,0.191664,0.235364,0.289512,...,-1.857687,-1.868647,-1.877031,-1.898063,-1.930288,-1.972489,1.040481,0.985071,0.936968,0.906711


## Save into **local** CSV file with header and indices

In [None]:
dvw.to_csv(r'/home/jovyan/Arctic/Vegetation_in_Troms_and_Finnmark/dvw_2019_dataframe.csv', header=True, index=True, sep=',', mode='a')

# Local .CSV files with header & index - t2m normalized
## dvw_2015_dataframe.csv **133954** rows and **** 
## dvw_2016_dataframe.csv **212459** rows and **** 
## dvw_2017_dataframe.csv **227807** rows and **11G** 
## dvw_2018_dataframe.csv **211791** rows and **9.5G** 
## dvw_2019_dataframe.csv **** rows and **G** 