In [1]:
import requests
import pandas as pd
from pyogrio import read_dataframe
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta
import glob
import json
import numpy as np
import ee
import geemap as gee
import time
import standardize_pft_funcs as spf
import math

In [2]:
try:
    ee.Initialize()
except Exception as e:
    print('GEE token expired. Use Jupyter Lab to authenticate')
    ee.Authenticate()

GEE token expired. Use Jupyter Lab to authenticate


Enter verification code:  4/1AeaYSHCluEbV2vZbNTDGSs7f_Wi7stxW3T_fkKSruQO7ereHyr_92QpHUGc



Successfully saved authorization token.


## Load data

### ELS team vegetation cover (20m-ish diameter, transect-quadrat sampling)

In [15]:
out_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'

In [16]:
# get els data
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'veg_cover.csv'
els = pd.read_csv(p + f, index_col=0)

In [17]:
# load vegetation summary file to get top cover for litter
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'veg.csv'
litter_info = pd.read_csv(p + f, index_col=0)
litter_info = litter_info[['plot_id', 'litter_alone_cover']]

In [18]:
# get plot info (lat, lon, etc)
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'plot.csv'
ancillary = pd.read_csv(p + f, index_col=0)

In [19]:
# get els plot information
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'els.csv'
els_info = pd.read_csv(p + f, index_col=0)

In [20]:
# drop rapid v-plots (verification plots) and get full plots only
fullplots = els_info[els_info['els_plot_type'] != 'Verification Plot']

In [21]:
# select els fcover data for full plots only
els_fullplots_fcover = fullplots.merge(els, on='plot_id')

In [22]:
len(fullplots)

587

In [23]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'ABR_ELS_only.csv'
els_fullplots_fcover.to_csv(p + f)

## Get unqiue species list

In [24]:
abr = els_fullplots_fcover

In [25]:
species = spf.get_unique_species(DFRAME=abr, 
                                 SCOL='veg_taxonomy', 
                                 DNAME='ABR_ELS', 
                                 SAVE=True, 
                                 OUTP=out_path)

Saved unique species list to /mnt/poseidon/remotesensing/arctic/data/vectors/ABR//ABR_ELS_unique_species.csv.


## Load species checklist

In [26]:
# load species checklist
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AKVEG_ACCS/'
f = 'AKVEG_species_checklist.csv'
checklist = read_dataframe(p + f)

# get first 2 words (genus-species) from checklist accepted name and data species name
checklist['Mapping Name'] = checklist['Accepted Name'].apply(spf.get_substrings)
species['Mapping Name'] = species['veg_taxonomy'].apply(spf.get_substrings)

In [27]:
habits = spf.fill_habits(unique_species=species, 
                         checklist=checklist, 
                         u_name='veg_taxonomy', 
                         c_unofficial_name='Name', 
                         c_official_name='Accepted Name', 
                         mapping_name='Mapping Name',
                         habit='Habit')

253 species are missing habits.
127 species still missing habits.
31 species still missing habits.
27 species still missing habits.


## Add evergreen/deciduous

In [28]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AK-AVA_post2000/'
f = 'evergreendecid_macander2022.csv'
evergrndecid = pd.read_csv(p + f, header=None)
evergrndecid.columns = ['evergreendecid', 'species']
final = spf.add_leaf_habit(habits, evergrndecid)

In [29]:
final

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Retention,Potential Height,Height
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,???,???,,,,,
1,Abietinella abietina,Abietinella abietina,moss,moss,,,
2,Achillea millefolium,Achillea millefolium,forb,forb,,,
3,Achillea sibirica,Achillea sibirica,forb,forb,,,
4,Agropyron sp.,Agropyron sp.,graminoid,graminoid,,,
...,...,...,...,...,...,...,...
658,Warnstorfia fluitans,Warnstorfia fluitans,moss,moss,,,
659,Warnstorfia sarmentosa,Warnstorfia sarmentosa,moss,moss,,,
660,Water,Water,,,,,
661,Wilhelmsia physodes,Wilhelmsia physodes,forb,forb,,,


In [30]:
# export
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'ABR_ELS_species_habit_00.csv'
final.to_csv(p + f)

In [31]:
list(final['Potential Habit'].unique())

[nan,
 'moss',
 'forb',
 'graminoid',
 'lichen',
 'shrub',
 'liverwort',
 'dwarf shrub',
 ' shrub,shrub, dwarf shrub, forb',
 'spore-bearing',
 ' shrub,dwarf shrub',
 ' tree, shrub,shrub, dwarf shrub, deciduous tree']

## Export shrubs, nonshrubs, null habit seperately

In [32]:
# get all shrub species
finalnonnull = final[~final['Potential Habit'].isnull()]
shrubs = finalnonnull[finalnonnull['Potential Habit'].str.contains('shrub')]
shrubs.to_csv(p + 'ABR_ELS_shrubs_00.csv')

In [33]:
# get all non-shrub species
nonshrubs = finalnonnull[~finalnonnull['Potential Habit'].str.contains('shrub')]
nonshrubs.to_csv(p + 'ABR_ELS_nonshrubs_00.csv')

In [34]:
# get null habits
finalnull = final[final['Potential Habit'].isnull()]
finalnull.to_csv(p + 'ABR_ELS_nullhabit_00.csv')

In [35]:
len(finalnull) + len(nonshrubs) + len(shrubs)

663

In [36]:
len(final)

663

## HERE: Manually add data for shrubs00 and nullhabit00

## Standardize PFT Schema

In [37]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
nonshrub = 'ABR_ELS_nonshrubs_00.csv'
null = 'ABR_ELS_nullhabit_01.csv'
shrub = 'ABR_ELS_shrubs_01.csv'

alltypes = [nonshrub, shrub, null]

d = []
for t in alltypes:
    habits = pd.read_csv(p + t, index_col=0)
    d.append(habits)

In [38]:
completehabits = pd.concat(d)
completehabits['Habit'].unique().tolist()

['moss',
 'forb',
 'graminoid',
 'lichen',
 'liverwort',
 'spore-bearing',
 'shrub',
 'dwarf shrub',
 'shrub, tree',
 'unknown',
 'bare mineral',
 'bare rock',
 'litter',
 'algae',
 'fungus',
 'grass',
 'water']

In [39]:
def standardize_habit(habitrow, leafrow, heightrow):
    
    habitrow = habitrow.to_numpy()
    leafrow = leafrow.to_numpy()
    heightrow = heightrow.to_numpy()
    new_row = []
    
    for habit, leaf, height in zip(habitrow, leafrow, heightrow):
        habit = habit.lower()
        if 'algae' in habit:
            new_habit = 'lichen cover (%)'
        elif habit == np.nan or 'unknown' in habit:
            new_habit = 'unknown cover (%)'
        elif 'moss' in habit or 'liverwort' in habit:
            new_habit = 'bryophyte cover (%)'
        elif 'spore-bearing' in habit:
            new_habit = 'forb cover (%)'
        elif habit == 'grass':
            new_habit = 'graminoid cover (%)'
        elif habit == 'dwarf shrub':
            new_habit = f'{leaf} {habit} cover (%)'
        elif height == 'dwarf to low shrub':
            new_habit = f'{leaf} {height} cover (%)'
        elif height == 'dwarf to tall shrub':
            new_habit = f'{leaf} {height} cover (%)'
        elif habit == 'shrub, tree':
            new_habit = f'{leaf} shrub to tree cover (%)'
        else:
            new_habit = f'{habit} cover (%)'
        new_row.append(" ".join(new_habit.split()).lower())
    
    return new_row
        
new_habit_col = standardize_habit(completehabits['Habit'], 
                                  completehabits['Leaf Retention'], 
                                  completehabits['Height'])

In [40]:
set(new_habit_col)

{'bare mineral cover (%)',
 'bare rock cover (%)',
 'bryophyte cover (%)',
 'deciduous dwarf shrub cover (%)',
 'deciduous dwarf to low shrub cover (%)',
 'deciduous dwarf to tall shrub cover (%)',
 'deciduous shrub to tree cover (%)',
 'evergreen dwarf shrub cover (%)',
 'evergreen dwarf to low shrub cover (%)',
 'forb cover (%)',
 'fungus cover (%)',
 'graminoid cover (%)',
 'lichen cover (%)',
 'litter cover (%)',
 'unknown cover (%)',
 'water cover (%)'}

In [41]:
completehabits['Standard Habit'] = new_habit_col

In [42]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'ABR_ELS_species_habit_standardized.csv'
completehabits.to_csv(p + f)

In [43]:
completehabits

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Retention,Potential Height,Height,Standard Habit
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Abietinella abietina,Abietinella abietina,moss,moss,,,,bryophyte cover (%)
2,Achillea millefolium,Achillea millefolium,forb,forb,,,,forb cover (%)
3,Achillea sibirica,Achillea sibirica,forb,forb,,,,forb cover (%)
4,Agropyron sp.,Agropyron sp.,graminoid,graminoid,,,,graminoid cover (%)
5,Agrostis scabra,Agrostis scabra,graminoid,graminoid,,,,graminoid cover (%)
...,...,...,...,...,...,...,...,...
644,Unknown moss 2,moss,moss,moss,,,,bryophyte cover (%)
645,Unknown moss 3,moss,moss,moss,,,,bryophyte cover (%)
646,Unknown moss 4,moss,moss,moss,,,,bryophyte cover (%)
647,Unknown moss 5,moss,moss,moss,,,,bryophyte cover (%)


## Join habit to fcover data

In [44]:
abr = abr.reset_index()

In [45]:
withfcover = abr.merge(completehabits, left_on='veg_taxonomy', right_on='Name')

In [46]:
withfcover

Unnamed: 0,index,project_id_x,plot_id,veg_observer,env_observer,env_field_start_ts,study_location,data_origin,els_plot_type,plot_radius,...,plot_uuid_y,veg_observation_uuid,Name,Mapping Name,Potential Habit,Habit,Leaf Retention,Potential Height,Height,Standard Habit
0,0,12-258.6.1,NPRA_T115_15_2012,Aaron F Wells,Michael S Davis,07/17/12 17:50:02,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,3782964c-7fd5-11e4-b9fe-0025902deb10,Lopadium pezizoideum,Lopadium pezizoideum,lichen,lichen,,,,lichen cover (%)
1,477,12-258.6.1,NPRA_T160_08_2012,Ellen R Trainor,Michael S Davis,08/08/12 11:28:36,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,37831e6e-7fd5-11e4-b035-0025902deb10,Lopadium pezizoideum,Lopadium pezizoideum,lichen,lichen,,,,lichen cover (%)
2,3477,12-258.6.1,NPRA_T014_02_2012,Erin K Johnson,Tracy Christopherson,07/21/12 11:31:38,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,37820826-7fd5-11e4-99d0-0025902deb10,Lopadium pezizoideum,Lopadium pezizoideum,lichen,lichen,,,,lichen cover (%)
3,1,12-258.6.1,NPRA_T115_15_2012,Aaron F Wells,Michael S Davis,07/17/12 17:50:02,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,3782964c-7fd5-11e4-b9fe-0025902deb10,Luzula confusa,Luzula confusa,graminoid,graminoid,,,,graminoid cover (%)
4,45,12-258.6.1,NPRA_T115_03_2012,Ellen R Trainor,Anja N Kade,07/17/12 14:58:25,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,37829002-7fd5-11e4-9829-0025902deb10,Luzula confusa,Luzula confusa,graminoid,graminoid,,,,graminoid cover (%)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12421,11823,12-258.6.1,NPRA_T144_11_2012,Erin K Johnson,Tracy Christopherson,07/19/12 16:46:24,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,3782e052-7fd5-11e4-9e9b-0025902deb10,Lophozia wenzelii,Lophozia wenzelii,liverwort,liverwort,,,,bryophyte cover (%)
12422,11849,12-258.6.1,NPRA_T019_05_2012,Louise M Farquharson,Anja N Kade,08/14/12 14:02:24,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,4x8m,...,,37821672-7fd5-11e4-be6b-0025902deb10,Paludella squarrosa,Paludella squarrosa,moss,moss,,,,bryophyte cover (%)
12423,11877,11-257,CHCO_T29_07_2011,Wendy A Davis,Aaron F Wells,07/19/11 00:00:00,Chukchi Sea Coast,ABR Inc.,Full Plot,10x10m,...,f14a5f84-6f62-11e4-9db1-0025902deb10,18a6895e-6f81-11e4-97a1-0025902deb10,Pseudephebe pubescens,Pseudephebe pubescens,lichen,lichen,,,,lichen cover (%)
12424,12029,12-258.5.1,120810b2,Martha K Raynolds,Kimberly S Allen,08/10/12 14:17:04,National Petroleum Reserve Alaska,ABR Inc.,Full Plot,10m radius,...,,3783daf2-7fd5-11e4-b345-0025902deb10,Baeomyces rufus,Baeomyces rufus,lichen,lichen,,,,lichen cover (%)


In [47]:
#grouped = withfcover.groupby(['plot_id', 'Standard Habit'], group_keys=True).agg({'species_cover': 'sum'})
grouped = withfcover.groupby(['plot_id', 'Standard Habit'], group_keys=True).agg({'cover_percent': 'sum'})

In [48]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,cover_percent
plot_id,Standard Habit,Unnamed: 2_level_1
120729b1,bryophyte cover (%),23.1
120729b1,deciduous dwarf shrub cover (%),0.1
120729b1,deciduous dwarf to low shrub cover (%),16.1
120729b1,deciduous dwarf to tall shrub cover (%),1.0
120729b1,evergreen dwarf shrub cover (%),13.0
...,...,...
NPRA_V073_01_2012,evergreen dwarf shrub cover (%),1.0
NPRA_V073_01_2012,evergreen dwarf to low shrub cover (%),1.0
NPRA_V073_01_2012,forb cover (%),2.1
NPRA_V073_01_2012,fungus cover (%),0.1


In [49]:
grouped.columns = grouped.columns.get_level_values(0)
grouped = grouped.reset_index()
grouped

Unnamed: 0,plot_id,Standard Habit,cover_percent
0,120729b1,bryophyte cover (%),23.1
1,120729b1,deciduous dwarf shrub cover (%),0.1
2,120729b1,deciduous dwarf to low shrub cover (%),16.1
3,120729b1,deciduous dwarf to tall shrub cover (%),1.0
4,120729b1,evergreen dwarf shrub cover (%),13.0
...,...,...,...
3707,NPRA_V073_01_2012,evergreen dwarf shrub cover (%),1.0
3708,NPRA_V073_01_2012,evergreen dwarf to low shrub cover (%),1.0
3709,NPRA_V073_01_2012,forb cover (%),2.1
3710,NPRA_V073_01_2012,fungus cover (%),0.1


In [50]:
groups = grouped.set_index(['plot_id', 'Standard Habit']).stack().unstack([1,2])
groups.columns = groups.columns.get_level_values(0)
groups = groups.reset_index()
groups

Standard Habit,plot_id,bryophyte cover (%),deciduous dwarf shrub cover (%),deciduous dwarf to low shrub cover (%),deciduous dwarf to tall shrub cover (%),evergreen dwarf shrub cover (%),evergreen dwarf to low shrub cover (%),forb cover (%),graminoid cover (%),lichen cover (%),fungus cover (%),water cover (%),bare mineral cover (%),litter cover (%),deciduous shrub to tree cover (%),unknown cover (%),bare rock cover (%)
0,120729b1,23.1,0.1,16.1,1.0,13.0,20.0,4.6,5.3,3.5,,,,,,,
1,120729b2,83.2,,23.1,1.0,0.1,0.1,0.1,20.1,,,,,,,,
2,120730b1,35.2,,32.0,5.0,2.0,,3.1,16.0,,,,,,,,
3,120730b2,17.1,,12.0,3.0,1.0,,1.1,17.1,,,,,,,,
4,120730b3,15.0,,13.0,3.0,1.0,,5.0,20.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,NPRA_T503_05_2012,0.1,,,,,,0.2,0.1,,,100.0,,,,,
581,NPRA_T610_02_2012,18.5,5.1,15.0,5.1,8.1,10.0,6.3,9.2,1.9,,,,,,,
582,NPRA_T625_01_2012,33.3,1.0,10.0,2.0,13.0,6.0,0.3,21.1,38.4,,,,,,,
583,NPRA_T629_03_2012,1.6,,,1.0,,,6.4,16.5,,,,,,,,


## Add frost boils cover (not sure if necessary)

In [51]:
# prep frost boil data for concatenating
frostboil_perc = fullplots.copy()
frostboil_perc['frost boil cover (%)'] = frostboil_perc['frost_boil_cover_percent']
frostboil_perc = frostboil_perc[['plot_id', 'frost boil cover (%)', 'plot_radius']]

In [52]:
groups = groups.merge(frostboil_perc, on='plot_id')

In [53]:
groups.columns.tolist()

['plot_id',
 'bryophyte cover (%)',
 'deciduous dwarf shrub cover (%)',
 'deciduous dwarf to low shrub cover (%)',
 'deciduous dwarf to tall shrub cover (%)',
 'evergreen dwarf shrub cover (%)',
 'evergreen dwarf to low shrub cover (%)',
 'forb cover (%)',
 'graminoid cover (%)',
 'lichen cover (%)',
 'fungus cover (%)',
 'water cover (%)',
 'bare mineral cover (%)',
 'litter cover (%)',
 'deciduous shrub to tree cover (%)',
 'unknown cover (%)',
 'bare rock cover (%)',
 'frost boil cover (%)',
 'plot_radius']

In [54]:
# required columns
necessary_cols = ['deciduous dwarf shrub cover (%)',
                  'deciduous dwarf to low shrub cover (%)',
                  'deciduous dwarf to tall shrub cover (%)',
                  'deciduous shrub to tree cover (%)',
                  'deciduous tree cover (%)',
                  'evergreen dwarf shrub cover (%)',
                  'evergreen dwarf to low shrub cover (%)',
                  'evergreen dwarf to tall shrub cover (%)',
                  'evergreen shrub to tree cover (%)',
                  'evergreen tree cover (%)',
                  'bryophyte cover (%)',
                  'forb cover (%)',
                  'graminoid cover (%)',
                  'lichen cover (%)']

In [55]:
cols = groups.columns.tolist()
addcols = []
for nc in necessary_cols:
    if nc not in cols:
        addcols.append(nc)

In [56]:
addcols

['deciduous tree cover (%)',
 'evergreen dwarf to tall shrub cover (%)',
 'evergreen shrub to tree cover (%)',
 'evergreen tree cover (%)']

In [57]:
groups[addcols] = np.nan

In [58]:
groups['source'] = 'ABR_ELS'

In [79]:
finaldata = groups.merge(ancillary, on='plot_id')

In [80]:
finaldata['year'] = pd.DatetimeIndex(finaldata['field_start_ts']).year

In [81]:
finaldata['year'] = finaldata['year'].astype('Int64')

In [82]:
finaldata['source_context'] = 'mapping vegetation'

In [83]:
# add litter top cover
finaldata = finaldata.merge(litter_info, on='plot_id')
finaldata['litter top cover (%)'] = finaldata['litter_alone_cover']
finaldata.drop(columns=['litter_alone_cover'], inplace=True)

In [84]:
finaldata.columns.tolist()

['plot_id',
 'bryophyte cover (%)',
 'deciduous dwarf shrub cover (%)',
 'deciduous dwarf to low shrub cover (%)',
 'deciduous dwarf to tall shrub cover (%)',
 'evergreen dwarf shrub cover (%)',
 'evergreen dwarf to low shrub cover (%)',
 'forb cover (%)',
 'graminoid cover (%)',
 'lichen cover (%)',
 'fungus cover (%)',
 'water cover (%)',
 'bare mineral cover (%)',
 'litter cover (%)',
 'deciduous shrub to tree cover (%)',
 'unknown cover (%)',
 'bare rock cover (%)',
 'frost boil cover (%)',
 'plot_radius',
 'deciduous tree cover (%)',
 'evergreen dwarf to tall shrub cover (%)',
 'evergreen shrub to tree cover (%)',
 'evergreen tree cover (%)',
 'source',
 'project_id',
 'area_id',
 'transect_id',
 'superplot_id',
 'subplot_id',
 'plot_type',
 'latitude',
 'longitude',
 'elevation_m',
 'h_datum',
 'v_datum',
 'loc_origin',
 'extra',
 'plot_uuid_array',
 'field_start_ts',
 'year',
 'source_context',
 'litter top cover (%)']

In [88]:
# combine non-vegetation top cover columns
finaldata2 = finaldata.copy()
finaldata2['bare ground top cover (%)'] = finaldata2['bare mineral cover (%)'] + finaldata2['bare rock cover (%)']
finaldata2.drop(columns=['bare mineral cover (%)', 
                         'bare rock cover (%)',
                         'litter cover (%)'], inplace=True)

In [89]:
# rename water
finaldata2.rename(columns={'water cover (%)':'water top cover (%)'}, inplace=True)

In [90]:
p = '/mnt/poseidon/remotesensing/arctic/data/training/Test_04/'
f = 'ABR_ELS_pft_fcover_00.csv'
finaldata2.to_csv(p + f)