In [1]:
import requests
import pandas as pd
from pyogrio import read_dataframe
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta
import glob
import json
import numpy as np
import ee
import geemap as gee
import time
import standardize_pft_funcs as spf
import math

In [2]:
try:
    ee.Initialize()
except Exception as e:
    print('GEE token expired. Use Jupyter Lab to authenticate')
    ee.Authenticate()

## Load data

In [3]:
out_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR'

In [4]:
# get rs data
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'vpi_plot_species_cover_with_trace.csv'
rs = pd.read_csv(p + f)

In [5]:
# get rs summary data
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'vpi_plot_summary.csv'
rs_summary = pd.read_csv(p + f)
nonveg_top = rs_summary[['plot_id', 'litter_topcov', 'water_topcov', 'bareground_topcov']]

In [6]:
# get ancillary data (lat,lon,etc)
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'plot.csv'
ancillary = pd.read_csv(p + f, index_col=0)

In [7]:
nonveg_top.columns

Index(['plot_id', 'litter_topcov', 'water_topcov', 'bareground_topcov'], dtype='object')

In [8]:
rs.columns

Index(['plot_id', 'veg_taxonomy', 'family', 'species_cover', 'vascular'], dtype='object')

In [9]:
# drop els plots (plots that start with a letter)
startswith_digit = [x.isdigit() for x in rs['plot_id'].str[0]]
rs['plot_sw_digit'] = startswith_digit
rs = rs[rs['plot_sw_digit'] == True]

## Extract species information

In [10]:
abr = rs

In [11]:
species = spf.get_unique_species(DFRAME=abr, 
                                 SCOL='veg_taxonomy', 
                                 DNAME='ABR_RS', 
                                 SAVE=True, 
                                 OUTP=out_path)

Saved unique species list to /mnt/poseidon/remotesensing/arctic/data/vectors/ABR/ABR_RS_unique_species.csv.


In [12]:
# load species checklist
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AKVEG_ACCS/'
f = 'AKVEG_species_checklist.csv'
checklist = read_dataframe(p + f)

# get first 2 words (genus-species) from checklist accepted name and data species name
checklist['Mapping Name'] = checklist['Accepted Name'].apply(spf.get_substrings)
species['Mapping Name'] = species['veg_taxonomy'].apply(spf.get_substrings)

## Assign species habits

In [13]:
# get potential habits
habits = spf.fill_habits(unique_species=species, 
                         checklist=checklist, 
                         u_name='veg_taxonomy', 
                         c_unofficial_name='Name', 
                         c_official_name='Accepted Name', 
                         mapping_name='Mapping Name',
                         habit='Habit')

161 species are missing habits.
93 species still missing habits.
27 species still missing habits.
25 species still missing habits.


## Add leaf retention

In [14]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AK-AVA_post2000/'
f = 'evergreendecid_macander2022.csv'
evergrndecid = pd.read_csv(p + f, header=None)
evergrndecid.columns = ['evergreendecid', 'species']
final = spf.add_leaf_habit(habits, evergrndecid)

In [15]:
# export
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'ABR_RS_species_habit_00.csv'
final.to_csv(p + f)

In [16]:
list(final['Potential Habit'].unique())

['forb',
 'graminoid',
 'lichen',
 'shrub',
 'liverwort',
 'dwarf shrub',
 ' dwarf shrub, forb,forb, shrub',
 'moss',
 nan,
 'spore-bearing',
 'dwarf shrub, shrub']

## Export shrubs, nonshrubs, null habit seperately

In [17]:
# get all shrub species
finalnonnull = final[~final['Potential Habit'].isnull()]
shrubs = finalnonnull[finalnonnull['Potential Habit'].str.contains('shrub')]
shrubs.to_csv(p + 'ABR_RS_shrubs_00.csv')

In [18]:
# get all non-shrub species
nonshrubs = finalnonnull[~finalnonnull['Potential Habit'].str.contains('shrub')]
nonshrubs.to_csv(p + 'ABR_RS_nonshrubs_00.csv')

In [19]:
# get null habits
finalnull = final[final['Potential Habit'].isnull()]
finalnull.to_csv(p + 'ABR_RS_nullhabit_00.csv')

In [20]:
len(finalnull) + len(nonshrubs) + len(shrubs)

385

In [21]:
len(final)

385

## HERE: Manually add data to shrubs00 and nullhabit00

## Standardize PFT schema

In [22]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
nonshrub = 'ABR_RS_nonshrubs_00.csv'
null = 'ABR_RS_nullhabit_01.csv'
shrub = 'ABR_RS_shrubs_01.csv'

alltypes = [nonshrub, shrub, null]

d = []
for t in alltypes:
    habits = pd.read_csv(p + t, index_col=0)
    d.append(habits)

In [23]:
completehabits = pd.concat(d)
completehabits['Habit'].unique().tolist()

['forb',
 'graminoid',
 'lichen',
 'liverwort',
 'moss',
 'spore-bearing',
 'shrub',
 'dwarf shrub',
 'shrub, tree',
 'bare ground',
 'bare mineral',
 'crust',
 'litter',
 'scat',
 'algae',
 'fungus',
 'water']

In [24]:
# veg: lichen = algae, lichen
# veg: bryophyte = liverwort, moss
# veg: graminoid = graminoid
# veg: forb = spore-bearing, forb
# veg: evergreen shrub = evergreen shrubs of all heights
# veg: deciduous shrub = deciduous shrubs of all heights
# ---
# nonveg: litter = litter + scat
# nonveg: bare ground = bare mineral + crust + bare ground
# nonveg: water = water

In [25]:
def standardize_habit_nonstandard(habitrow, leafrow, heightrow):
    
    habitrow = habitrow.to_numpy()
    leafrow = leafrow.to_numpy()
    heightrow = heightrow.to_numpy()
    new_row = []
    
    for habit, leaf, height in zip(habitrow, leafrow, heightrow):
        habit = habit.lower()
        if habit == 'shrub, tree':
            new_habit = f'{leaf} dwarf to tree cover (%)'
        elif habit == 'dwarf shrub':
            new_habit = f'{leaf} {habit} cover (%)'
        elif 'shrub' in habit:
            new_habit = f'{leaf} {height} cover (%)'
        else:
            new_habit = f'{habit} cover (%)'
        new_row.append(" ".join(new_habit.split()).lower())
    
    return new_row

In [26]:
def standardize_habit(habitrow, leafrow, heightrow):
    
    habitrow = habitrow.to_numpy()
    leafrow = leafrow.to_numpy()
    heightrow = heightrow.to_numpy()
    new_row = []
    
    for habit, leaf, height in zip(habitrow, leafrow, heightrow):
        habit = habit.lower()
        if 'algae' in habit:
            new_habit = 'lichen cover (%)'
        elif habit == np.nan or 'unknown' in habit:
            new_habit = 'unknown cover (%)'
        elif 'moss' in habit or 'liverwort' in habit:
            new_habit = 'bryophyte cover (%)'
        elif 'spore-bearing' in habit:
            new_habit = 'forb cover (%)'
        elif habit == 'grass':
            new_habit = 'graminoid cover (%)'
            
        elif habit == 'shrub, tree':
            new_habit = f'{leaf} dwarf to tree cover (%)'
        elif habit == 'dwarf shrub':
            new_habit = f'{leaf} {habit} cover (%)'
        elif 'shrub' in habit:
            new_habit = f'{leaf} {height} cover (%)'
            
        elif 'litter' in habit or 'scat' in habit:
            new_habit = 'litter cover (%)'
        elif 'bare ground' in habit or 'crust' in habit or 'mineral' in habit:
            new_habit = 'bare ground cover (%)'
        else:
            new_habit = f'{habit} cover (%)'
        new_row.append(" ".join(new_habit.split()).lower())
    
    return new_row

In [27]:
# standardized PFT
standard_habit_col = standardize_habit(completehabits['Habit'], 
                                       completehabits['Leaf Retention'], 
                                       completehabits['Height'])

In [28]:
# non-standardized PFT
nonstandard_habit_col = standardize_habit_nonstandard(completehabits['Habit'], 
                                                      completehabits['Leaf Retention'], 
                                                      completehabits['Height'])

In [29]:
completehabits['Standard Habit'] = standard_habit_col
completehabits['NonStandard Habit'] = nonstandard_habit_col

In [30]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/ABR/'
f = 'ABR_RS_species_habit_standardized.csv'
completehabits.to_csv(p + f)

## Join PFT to fcover data

In [31]:
# add vegetation fcover and sum to PFT level
def add_cover(cover_df, pft_groups, habit_col):
    cover = cover_df.reset_index()
    withfcover = cover.merge(pft_groups, left_on='veg_taxonomy', right_on='Name', how='left')
    grouped = withfcover.groupby(['plot_id', habit_col], 
                                 group_keys=True).agg({'species_cover': 'sum'})
    return grouped

In [32]:
# flatten multilevel
def flatten_multilevel(grouped):
    grouped.columns = grouped.columns.get_level_values(0)
    grouped = grouped.reset_index()
    return grouped

In [33]:
# transpose
def transpose_df(grouped, habit_col):
    groups = grouped.set_index(['plot_id', habit_col]).stack().unstack([1,2])
    groups.columns = groups.columns.get_level_values(0)
    groups = groups.reset_index()
    return groups

In [34]:
def add_standard_cols(groups):
    
    # required columns
    necessary_cols = ['deciduous dwarf shrub cover (%)',
                      'deciduous dwarf to low shrub cover (%)',
                      'deciduous dwarf to tall shrub cover (%)',
                      'deciduous dwarf to tree cover (%)',
                      'deciduous tree cover (%)',
                      'evergreen dwarf shrub cover (%)',
                      'evergreen dwarf to low shrub cover (%)',
                      'evergreen dwarf to tall shrub cover (%)',
                      'evergreen dwarf to tree cover (%)',
                      'evergreen tree cover (%)',
                      'bryophyte cover (%)',
                      'forb cover (%)',
                      'graminoid cover (%)',
                      'lichen cover (%)']
    
    # add missing columns and fill with nan
    cols = groups.columns.tolist()
    addcols = []
    for nc in necessary_cols:
        if nc not in cols:
            addcols.append(nc)
    groups[addcols] = np.nan
    return groups

In [35]:
grouped_standard = add_cover(abr, completehabits, 'Standard Habit')
grouped_standard = flatten_multilevel(grouped_standard)
grouped_standard = transpose_df(grouped_standard, 'Standard Habit')
standard_groups = add_standard_cols(grouped_standard)

In [36]:
grouped_nonstand = add_cover(abr, completehabits, 'NonStandard Habit')
grouped_nonstand = flatten_multilevel(grouped_nonstand)
grouped_nonstand = transpose_df(grouped_nonstand, 'NonStandard Habit')

## Read ancillary table

In [37]:
# add ancillary information
anc = ancillary.copy()
anc.rename(columns={'plot_id':'Site Code'}, inplace=True)
# year
anc['year'] = pd.DatetimeIndex(anc['field_start_ts']).year
anc['year'] = anc['year'].astype('Int64')
# plot size
anc['plot_radius_m'] = '55'
# context
#anc['source_context'] = 'mapping vegetation'
# dataset source
anc['source'] = 'ABR_RS'

In [38]:
anc = anc[['Site Code', 'latitude', 'longitude', 'year', 'plot_radius_m', 'source']]

## Create total cover file (non-merged)

In [39]:
# set index and sort columns
totcover_nm = grouped_nonstand.copy()
totcover_nm.rename(columns={'plot_id':'Site Code'}, inplace=True)
totcover_nm.drop(columns=['fungus cover (%)'], inplace=True)
totcover_nm.set_index('Site Code', inplace=True)
totcover_nm.columns.name = None
totcover_nm = totcover_nm[sorted(totcover_nm.columns.tolist())]

In [40]:
# add "total cover" to column names
cols = totcover_nm.columns.tolist()
cols = [x.replace('cover (%)' , 'total cover (%)') for x in cols]
totcover_nm.columns = cols

In [41]:
# add ancillary
totcover_nm = totcover_nm.merge(anc, on='Site Code', how='left')

In [43]:
totcover_nm.set_index('Site Code', inplace=True)

In [57]:
# export
p = '/mnt/poseidon/remotesensing/arctic/data/training/Test_04/original_files/'
f = 'ABR_RS_fcover_total_orig.csv'
totcover_nm.to_csv(p + f)

## Create total cover file (merged)

In [45]:
# set index and sort columns
totcover = standard_groups.copy()
totcover.rename(columns={'plot_id':'Site Code'}, inplace=True)
totcover.drop(columns=['fungus cover (%)'], inplace=True)
totcover.set_index('Site Code', inplace=True)
totcover.columns.name = None
totcover = totcover[sorted(totcover.columns.tolist())]

In [46]:
# add "total cover" to column names
cols = totcover.columns.tolist()
cols = [x.replace('cover (%)' , 'total cover (%)') for x in cols]
totcover.columns = cols

In [47]:
# add ancillary
totcover = totcover.merge(anc, on='Site Code', how='left')

In [49]:
totcover.set_index('Site Code', inplace=True)

In [56]:
# export
p = '/mnt/poseidon/remotesensing/arctic/data/training/Test_04/original_files/'
f = 'ABR_RS_fcover_total_merged.csv'
totcover.to_csv(p + f)

## Create top cover file

In [51]:
# rename columns and set index
topcover = nonveg_top.copy()
topcover.rename(columns={'litter_topcov':'litter top cover (%)',
                         'water_topcov':'water top cover (%)',
                         'bareground_topcov':'bare ground top cover (%)',
                         'plot_id':'Site Code'}, inplace=True)
topcover.set_index('Site Code', inplace=True)
topcover = topcover[sorted(topcover.columns.tolist())]

In [52]:
# add ancillary
topcover = topcover.merge(anc, on='Site Code', how='left')

In [54]:
topcover.set_index('Site Code', inplace=True)

In [55]:
# export
p = '/mnt/poseidon/remotesensing/arctic/data/training/Test_04/original_files/'
f = 'ABR_RS_fcover_top_nonPFT.csv'
topcover.to_csv(p + f)