In [1]:
import pandas as pd
import numpy as np
import standardize_pft_funcs as spf
from pyogrio import read_dataframe

In [2]:
# load fcover data with SPP code
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_PLANT_ABUNDANCE_COVER_20120502.csv'
tvc = pd.read_csv(p + f, index_col=0)
tvc

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance
1685,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5
1686,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5
1687,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5
1688,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5
1689,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0
...,...,...,...,...,...,...,...,...,...
935274,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5
935275,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0
935276,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0
935277,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0


In [3]:
# select control plots only
ctl = tvc[tvc['TRTMT'] == 'CTL']

In [4]:
ctl

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance
1685,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5
1686,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5
1687,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5
1688,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5
1689,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0
...,...,...,...,...,...,...,...,...,...
935274,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5
935275,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0
935276,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0
935277,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0


In [5]:
# load species name info to connect to SPP code
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_SPP_20120502.csv'
spp = pd.read_csv(p + f, index_col=0)
spp

Unnamed: 0,SITE,SPP,GRSTRAT,WOODY,GFNARROWarft,GFNARROWwalker,GENUS,SPECIES,RAUNKIAERS,SORENSENS,GEODISTR
1,SADVENT,ALOBOR,GUER,HERB,GRAMINOID,GRASS,Alopecurus,borealis,HE,,
2,SADVENT,ANTJUR,,,LIVER,LIVERLE,Anthelia,juratzkana,,,
3,SADVENT,AULPAL,,,MOSS,MPLEU,Aulacomnium,palustre,,,
4,SADVENT,AULTUR,,,MOSS,MPLEU,Aulacomnium,turgidum,,,
5,SADVENT,BISVIV,GUER,HERB,FORBSV,FORB,Bistorta,vivipara,HE,,
...,...,...,...,...,...,...,...,...,...,...,...
7334,NYALESUND,SALPOL,,WOODY,DSHRUB,SDECI,Salix,polaris,,,
7335,NYALESUND,SAXOPP,,,FORBSV,FORB,Saxifraga,oppositifolia,,,
7336,NYALESUND,XXXSOIL,,,SOIL,SOIL,,,,,
7337,NYALESUND,XXXROCK,,,ROCK,ROCK,,,,,


In [6]:
# load site/subsite information
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_SITE_SUBSITE_20141216.csv'
siteinfo = pd.read_csv(p + f, encoding='Windows-1252', index_col=0)
siteinfo

Unnamed: 0,SITE,SUBSITE,COMMTYPE,LAT,LONG,ELEV,AZONE,PI,DomGrazer,GrazerIntensity,CAVM,CAVMBROAD
1,ABISKO,ABISKODRY,MOIST,68.350000,18.820000,450.0,ALPINE,Michelsen,insects,low,S1,S
2,ABISKO,ABISKOWET,WET,68.350000,18.820000,400.0,ALPINE,Michelsen,insects,low,W3,W
3,ABISKO,PEATLAND,MOIST,68.350000,18.820000,340.0,LOW,Cornelissen,small,low,W3,W
4,AKUREYRI,GA66,MOIST,65.590000,-17.970000,190.0,LOW,Magnusson,large,medium,G3,G
5,AKUREYRI,MD72,MOIST,65.510000,-18.080000,190.0,LOW,Magnusson,large,high,G3,G
...,...,...,...,...,...,...,...,...,...,...,...,...
229,ZACKENBERG,SALIX ARCTICA SNOWBED,MOIST,74.280000,-20.320000,45.0,HIGH,Schmidt,large,medium,P2,P
230,ZACKENBERG,SALIX_SITE,MOIST,74.475081,-20.540539,40.0,HIGH,Schmidt,large,medium,,
231,ZACKENBERG,VACCINIUM ULIGONOSUM HEATH,MOIST,74.280000,-20.380000,10.0,HIGH,Schmidt,large,medium,P2,P
232,ZERMATT,AGOZERM,DRY,46.000000,7.700000,2480.0,ALPINE,Wipf,large,low,,


In [7]:
# attach site information to fcover data
df1 = pd.merge(ctl, siteinfo,
               how='left',
               left_on=['SITE','SUBSITE'],
               right_on=['SITE', 'SUBSITE'])

In [8]:
# attach species name to fcover/site info
df2 = pd.merge(df1, spp,  
               how='left', 
               left_on=['SITE','SPP'], 
               right_on = ['SITE','SPP'])
df2

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance,COMMTYPE,...,CAVMBROAD,GRSTRAT,WOODY,GFNARROWarft,GFNARROWwalker,GENUS,SPECIES,RAUNKIAERS,SORENSENS,GEODISTR
0,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5,MOIST,...,G,,HERB,FORBSV,SLVASC,Equisetum,arvense,,,
1,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5,MOIST,...,G,,HERB,GRAMINOID,GRASS,Festuca,richardsonii,,,
2,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5,MOIST,...,G,,HERB,FORBSV,FORB,Galium,normanii,,,
3,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5,MOIST,...,G,,HERB,GRAMINOID,RUSH,Juncus,trifidus,,,
4,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0,MOIST,...,G,,,MOSS,MOSSU,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15633,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5,MOIST,...,G,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine
15634,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0,MOIST,...,G,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine
15635,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0,MOIST,...,G,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine
15636,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0,MOIST,...,G,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine


In [9]:
# create species name (genus species)
df2['Name'] = df2['GENUS'] + ' ' + df2['SPECIES']

In [10]:
# fill NAN species name with PFT
df2['Name'].fillna(df2['GFNARROWarft'], inplace=True)

In [11]:
df2

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance,COMMTYPE,...,GRSTRAT,WOODY,GFNARROWarft,GFNARROWwalker,GENUS,SPECIES,RAUNKIAERS,SORENSENS,GEODISTR,Name
0,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5,MOIST,...,,HERB,FORBSV,SLVASC,Equisetum,arvense,,,,Equisetum arvense
1,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5,MOIST,...,,HERB,GRAMINOID,GRASS,Festuca,richardsonii,,,,Festuca richardsonii
2,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5,MOIST,...,,HERB,FORBSV,FORB,Galium,normanii,,,,Galium normanii
3,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5,MOIST,...,,HERB,GRAMINOID,RUSH,Juncus,trifidus,,,,Juncus trifidus
4,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0,MOIST,...,,,MOSS,MOSSU,,,,,,MOSS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15633,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15634,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15635,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15636,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata


In [12]:
# get list of unique species names
species = spf.get_unique_species(DFRAME=df2, 
                             SCOL='Name', 
                             DNAME='TVC', 
                             SAVE=False, 
                             OUTP='')

In [13]:
species

Unnamed: 0,Name
0,ALGAE
1,Achillea atrata
2,Achillea lanulosa
3,Acomastylis rossii
4,Adoxa moschatellina
...,...
431,Veronica aphylla
432,Vicia cracca
433,Viola adunca
434,Viola palustris


In [14]:
ancillary_cols = df2.columns.to_list()

In [15]:
ancillary = spf.get_species_ancillary(DFRAME=df2,
                                      ANC_COLS=ancillary_cols,
                                      DNAME='TVC',
                                      SAVE=False,
                                      OUTP=None)

In [16]:
# load species checklist
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AKVEG_ACCS/'
f = 'AKVEG_species_checklist.csv'
checklist = read_dataframe(p + f)

In [17]:
# get first 2 words (genus-species) from checklist accepted name and data species name
checklist['Mapping Name'] = checklist['Accepted Name'].apply(spf.get_substrings)
species['Mapping Name'] = species['Name'].apply(spf.get_substrings)

In [18]:
# attach potential habits to each species
habits = spf.fill_habits(unique_species=species, 
                         checklist=checklist, 
                         u_name='Name', 
                         c_unofficial_name='Name', 
                         c_official_name='Accepted Name',
                         mapping_name='Mapping Name',
                         habit='Habit')

209 species are missing habits.
110 species still missing habits.
34 species still missing habits.
31 species still missing habits.


In [19]:
# add evergreen/deciduous information to each species
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/AK-AVA_post2000/'
f = 'evergreendecid_macander2022.csv'
evergrndecid = pd.read_csv(p + f, header=None)
evergrndecid.columns = ['evergreendecid', 'species']
final = spf.add_leaf_habit(habits, evergrndecid)

In [20]:
# clean up
final['Potential Habit'] = final['Habit']
final[['Potential Height', 'Height', 'Habit']] = np.NaN
final = final[['Name', 'Mapping Name', 'Potential Habit', 
               'Habit', 'Leaf Habit', 'Potential Height', 
               'Height']]

In [21]:
final

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ALGAE,ALGAE,,,,,
1,Achillea atrata,Achillea atrata,forb,,,,
2,Achillea lanulosa,Achillea lanulosa,forb,,,,
3,Acomastylis rossii,Acomastylis rossii,forb,,,,
4,Adoxa moschatellina,Adoxa moschatellina,forb,,,,
...,...,...,...,...,...,...,...
431,Veronica aphylla,Veronica aphylla,forb,,,,
432,Vicia cracca,Vicia cracca,forb,,,,
433,Viola adunca,Viola adunca,forb,,,,
434,Viola palustris,Viola palustris,forb,,,,


In [22]:
nanfinal = final[final['Potential Habit'].isna()]
nanfinal

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ALGAE,ALGAE,,,,,
60,Calluna vulgaris,Calluna vulgaris,,,,,
118,Chionophila jamesii,Chionophila jamesii,,,,,
135,Colobanthus quitensis,Colobanthus quitensis,,,,,
138,DSHRUB,DSHRUB,,,,,
139,DUNG,DUNG,,,,,
188,FORBSV,FORBSV,,,,,
189,FUNGI,FUNGI,,,,,
196,Filipendula ulmaria,Filipendula ulmaria,,,,,
197,GRAMINOID,GRAMINOID,,,,,


In [23]:
hab = nanfinal.merge(df2[['Name', 'GFNARROWarft']], 
                    left_on='Name', 
                    right_on='Name', 
                    how='inner')

In [24]:
fillhab = hab.groupby('Name').agg({'GFNARROWarft':set})
fillhab = fillhab.explode('GFNARROWarft')
fillhab

Unnamed: 0_level_0,GFNARROWarft
Name,Unnamed: 1_level_1
ALGAE,ALGAE
Calluna vulgaris,ESHRUB
Chionophila jamesii,FORBSV
Colobanthus quitensis,FORBSV
DSHRUB,DSHRUB
DUNG,DUNG
FORBSV,FORBSV
FUNGI,FUNGI
Filipendula ulmaria,FORBSV
GRAMINOID,GRAMINOID


In [25]:
# fill potential habit and habit columns
nf = nanfinal.copy()
nf['Potential Habit'] = fillhab['GFNARROWarft'].values
nf['Habit'] = nf['Potential Habit']

In [26]:
final2 = final.copy()
final2['Potential Habit'].fillna(nf['Potential Habit'], inplace=True)
final2['Habit'].fillna(nf['Habit'], inplace=True)
final2['Habit'].fillna(final2['Potential Habit'], inplace=True)
final2['Potential Habit'] = final2['Potential Habit'].str.lower()
final2['Habit'] = final2['Habit'].str.lower()
final2

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,ALGAE,ALGAE,algae,algae,,,
1,Achillea atrata,Achillea atrata,forb,forb,,,
2,Achillea lanulosa,Achillea lanulosa,forb,forb,,,
3,Acomastylis rossii,Acomastylis rossii,forb,forb,,,
4,Adoxa moschatellina,Adoxa moschatellina,forb,forb,,,
...,...,...,...,...,...,...,...
431,Veronica aphylla,Veronica aphylla,forb,forb,,,
432,Vicia cracca,Vicia cracca,forb,forb,,,
433,Viola adunca,Viola adunca,forb,forb,,,
434,Viola palustris,Viola palustris,forb,forb,,,


In [27]:
final2['Habit'].unique()

array(['algae', 'forb', 'graminoid', 'lichen', 'liverwort', 'dwarf shrub',
       'shrub, shrub, forb, dwarf shrub', 'moss', 'shrub',
       'shrub, tree, shrub, deciduous tree, dwarf shrub', 'spore-bearing',
       'eshrub', 'forbsv', 'dshrub', 'dung', 'fungi', 'litter',
       'shrub, shrub, dwarf shrub', 'other', 'rock', 'shrubu', 'soil',
       'dwarf shrub, shrub', 'unk', 'water'], dtype=object)

In [28]:
# get all shrub species
finalnonnull = final2[~final2['Habit'].isnull()]
shrubs = finalnonnull[finalnonnull['Habit'].str.contains('shrub')]

In [29]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_shrubs_00.csv'
shrubs.to_csv(p + f)

In [30]:
# get all non-shrub species
nonshrubs = finalnonnull[~finalnonnull['Habit'].str.contains('shrub')]

In [31]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_nonshrubs_00.csv'
shrubs.to_csv(p + f)

In [32]:
# get all null species
finalnull = final2[final2['Habit'].isnull()]

In [33]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_nullhabit_00.csv'
finalnull.to_csv(p + f)

# Clean

In [34]:
# read cleaned shrubs data
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'TVC_shrubs_01.csv'
shrubs2 = pd.read_csv(p + f, index_col=0)

In [35]:
completehabits = pd.concat([shrubs2, nonshrubs, finalnull])

In [36]:
completehabits['Habit'].unique().tolist()

['dwarf shrub',
 'forb',
 'shrub',
 'shrub, tree',
 'unknown shrub',
 'algae',
 'graminoid',
 'lichen',
 'liverwort',
 'moss',
 'spore-bearing',
 'forbsv',
 'dung',
 'fungi',
 'litter',
 'other',
 'rock',
 'soil',
 'unk',
 'water']

In [37]:
def standardize_habit(habitrow, leafrow, heightrow):
    
    habitrow = habitrow.to_numpy()
    leafrow = leafrow.to_numpy()
    heightrow = heightrow.to_numpy()
    new_row = []
    
    for habit, leaf, height in zip(habitrow, leafrow, heightrow):
        if 'algae' in habit:
            new_habit = 'lichen cover (%)'
        elif 'moss' in habit or 'liverwort' in habit:
            new_habit = 'bryophyte cover (%)'
        elif 'spore-bearing' in habit:
            new_habit = 'forb cover (%)'
        elif 'coniferous' in habit:
            new_habit = 'evergreen tree cover (%)'
        elif habit == 'dwarf shrub':
            new_habit = f'{leaf} {habit} cover (%)'
        elif height == 'dwarf to low shrub':
            new_habit = f'{leaf} {height} cover (%)'
        elif height == 'dwarf to tall shrub':
            new_habit = f'{leaf} {height} cover (%)'
        elif habit == 'shrub, tree':
            new_habit = f'{leaf} shrub to tree cover (%)'
        elif 'forb' in habit:
            new_habit = 'forb cover (%)'
        elif habit == 'unk':
            new_habit = 'unknown species cover (%)'
        elif habit == 'unknown shrub':
            new_habit = f'{habit} cover (%)'
        elif 'shrub' in habit:
            new_habit = f'{leaf} {habit} cover (%)'
        else:
            new_habit = f'{habit} cover (%)'
        new_row.append(" ".join(new_habit.split()).lower())
    
    return new_row

In [38]:
completehabits['Standard Habit'] = standardize_habit(completehabits['Habit'], 
                                                     completehabits['Leaf Habit'], 
                                                     completehabits['Height'])

In [39]:
completehabits

Unnamed: 0_level_0,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height,Standard Habit
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
34,Arctostaphylos alpina,Arctostaphylos alpina,dwarf shrub,dwarf shrub,Evergreen,,,evergreen dwarf shrub cover (%)
35,Arctostaphylos rubra,Arctostaphylos rubra,dwarf shrub,dwarf shrub,Evergreen,,,evergreen dwarf shrub cover (%)
36,Arctostaphylos uva-ursi,Arctostaphylos uva-ursi,dwarf shrub,dwarf shrub,Evergreen,,,evergreen dwarf shrub cover (%)
43,Artemisia pattersonii,Artemisia pattersonii,"shrub,forb, forb, dwarf shrub",forb,,"Perennials, 8–20 cm, mildly aromatic.",,forb cover (%)
44,Artemisia scopulorum,Artemisia scopulorum,"shrub,forb, forb, dwarf shrub",forb,,"Perennials, 10–25 cm (cespitose), mildly aroma...",,forb cover (%)
...,...,...,...,...,...,...,...,...
431,Veronica aphylla,Veronica aphylla,forb,forb,,,,forb cover (%)
432,Vicia cracca,Vicia cracca,forb,forb,,,,forb cover (%)
433,Viola adunca,Viola adunca,forb,forb,,,,forb cover (%)
434,Viola palustris,Viola palustris,forb,forb,,,,forb cover (%)


In [40]:
completehabits['Standard Habit'].unique()

array(['evergreen dwarf shrub cover (%)', 'forb cover (%)',
       'deciduous dwarf to low shrub cover (%)',
       'deciduous dwarf to tall shrub cover (%)',
       'evergreen dwarf to low shrub cover (%)',
       'deciduous shrub cover (%)',
       'evergreen dwarf to tall shrub cover (%)',
       'unknown shrub cover (%)', 'deciduous dwarf shrub cover (%)',
       'deciduous shrub to tree cover (%)', 'lichen cover (%)',
       'graminoid cover (%)', 'bryophyte cover (%)', 'dung cover (%)',
       'fungi cover (%)', 'litter cover (%)', 'other cover (%)',
       'rock cover (%)', 'soil cover (%)', 'unknown species cover (%)',
       'water cover (%)'], dtype=object)

In [41]:
completehabits = completehabits.replace(r'^\s*$', np.nan, regex=True)

# Add to fcover

In [42]:
ancillary

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance,COMMTYPE,...,GRSTRAT,WOODY,GFNARROWarft,GFNARROWwalker,GENUS,SPECIES,RAUNKIAERS,SORENSENS,GEODISTR,Name
0,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5,MOIST,...,,HERB,FORBSV,SLVASC,Equisetum,arvense,,,,Equisetum arvense
1,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5,MOIST,...,,HERB,GRAMINOID,GRASS,Festuca,richardsonii,,,,Festuca richardsonii
2,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5,MOIST,...,,HERB,FORBSV,FORB,Galium,normanii,,,,Galium normanii
3,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5,MOIST,...,,HERB,GRAMINOID,RUSH,Juncus,trifidus,,,,Juncus trifidus
4,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0,MOIST,...,,,MOSS,MOSSU,,,,,,MOSS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15633,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15634,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15635,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata
15636,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0,MOIST,...,PHAL,WOODY,DSHRUB,SDECI,Salix,reticulata,CH,,alpine,Salix reticulata


In [43]:
# get habit for each species in file
plotlevel_habit = ancillary.merge(completehabits, left_on='Name', right_on='Name', how='left')
plotlevel_habit

Unnamed: 0,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance,COMMTYPE,...,SORENSENS,GEODISTR,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height,Standard Habit
0,AKUREYRI,GA66,CTL,658,1998,EQUARV,LIVE,UNK,0.5,MOIST,...,,,Equisetum arvense,Equisetum arvense,spore-bearing,spore-bearing,,,,forb cover (%)
1,AKUREYRI,GA66,CTL,658,1998,FESRIC,LIVE,UNK,0.5,MOIST,...,,,Festuca richardsonii,Festuca richardsonii,graminoid,graminoid,,,,graminoid cover (%)
2,AKUREYRI,GA66,CTL,658,1998,GALNOR,LIVE,UNK,0.5,MOIST,...,,,Galium normanii,Galium normanii,forb,forb,,,,forb cover (%)
3,AKUREYRI,GA66,CTL,658,1998,JUNTRI,LIVE,UNK,0.5,MOIST,...,,,Juncus trifidus,Juncus trifidus,graminoid,graminoid,,,,graminoid cover (%)
4,AKUREYRI,GA66,CTL,658,1998,XXXMOSSSPP,LIVE,UNK,38.0,MOIST,...,,,MOSS,MOSS,moss,moss,,,,bryophyte cover (%)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15633,VALBERCLA,ALPINE,CTL,CTL2,1994,SALRETI,LIVE,,15.5,MOIST,...,,alpine,Salix reticulata,Salix reticulata,dwarf shrub,dwarf shrub,Deciduous,,,deciduous dwarf shrub cover (%)
15634,VALBERCLA,ALPINE,CTL,CTL3,1994,SALRETI,LIVE,,3.0,MOIST,...,,alpine,Salix reticulata,Salix reticulata,dwarf shrub,dwarf shrub,Deciduous,,,deciduous dwarf shrub cover (%)
15635,VALBERCLA,ALPINE,CTL,CTL4,1994,SALRETI,LIVE,,3.0,MOIST,...,,alpine,Salix reticulata,Salix reticulata,dwarf shrub,dwarf shrub,Deciduous,,,deciduous dwarf shrub cover (%)
15636,VALBERCLA,ALPINE,CTL,CTL5,1994,SALRETI,LIVE,,3.0,MOIST,...,,alpine,Salix reticulata,Salix reticulata,dwarf shrub,dwarf shrub,Deciduous,,,deciduous dwarf shrub cover (%)


In [72]:
# get mean of fcover for all plots within year-subsite-site
groups = plotlevel_habit.groupby(['SITE', 'SUBSITE', 'YEAR', 'Standard Habit'], group_keys=True).Abundance.agg(lambda x: sum(x)/len(x))
groups = groups.reset_index()
groups['SITESUBSITE'] = groups['SITE'] + groups['SUBSITE']
groups

Unnamed: 0,SITE,SUBSITE,YEAR,Standard Habit,Abundance,SITESUBSITE
0,AKUREYRI,GA66,1998,bryophyte cover (%),32.400000,AKUREYRIGA66
1,AKUREYRI,GA66,1998,deciduous dwarf to low shrub cover (%),14.500000,AKUREYRIGA66
2,AKUREYRI,GA66,1998,evergreen dwarf shrub cover (%),5.375000,AKUREYRIGA66
3,AKUREYRI,GA66,1998,evergreen dwarf to low shrub cover (%),3.000000,AKUREYRIGA66
4,AKUREYRI,GA66,1998,forb cover (%),0.983607,AKUREYRIGA66
...,...,...,...,...,...,...
841,VALBERCLA,ALPINE,1996,forb cover (%),1.219626,VALBERCLAALPINE
842,VALBERCLA,ALPINE,1996,graminoid cover (%),0.452941,VALBERCLAALPINE
843,VALBERCLA,ALPINE,2009,deciduous dwarf shrub cover (%),6.771429,VALBERCLAALPINE
844,VALBERCLA,ALPINE,2009,forb cover (%),1.157391,VALBERCLAALPINE


In [74]:
t = groups[groups['SITESUBSITE'] == 'AKUREYRIGA66']
t = t[t['Standard Habit'] == 'bryophyte cover (%)']
t

Unnamed: 0,SITE,SUBSITE,YEAR,Standard Habit,Abundance,SITESUBSITE
0,AKUREYRI,GA66,1998,bryophyte cover (%),32.4,AKUREYRIGA66
8,AKUREYRI,GA66,2005,bryophyte cover (%),35.0,AKUREYRIGA66


In [45]:
groups['SITESUBSITE'].unique()

array(['AKUREYRIGA66', 'AKUREYRIMD72', 'AKUREYRISB63', 'AKUREYRISY59',
       'ALEXFIORDCASSIOPE_COVER', 'ALEXFIORDLEVDOLOMITE',
       'ALEXFIORDLEVGRANITE', 'ATQASUKRATE_BETULA_SHRUB',
       'ATQASUKRATE_CAREX_ WET_MEADOW', 'ATQASUKRATE_DRYAS_HEATH',
       'ATQASUKRATE_ERIOPHORUM_WET_MEADOW',
       'ATQASUKRATE_HIEROCHLOE_DRY_MEADOW',
       'BARROWARCTOPHILA_POND_ORDINATION',
       'BARROWCAREX WET MEADOW_ORDINATION',
       'BARROWCAREX_MESIC MEADOW_ORDINATION',
       'BARROWCAREX_MOIST MEADOW_ORDINATION',
       'BARROWCAREX_MOIST_MEADOW_MICROTOPO',
       'BARROWCAREX_WET_MEADOW_MICROTOPO', 'BARROWDRY_HEATH_ORDINATION',
       'BARROWDUPONTIA_WET_MEADOW_MICROTOPO', 'BLONDUOSSD33',
       'BLONDUOSSD34', 'DALSMYNNIAG4', 'DALSMYNNIKD24', 'DALSMYNNIKD25',
       'DOVREKUNTSHOE', 'FAROESORNFELLI', 'FINSERIDGE_COVER',
       'HJARDARLANDLH92', 'HJARDARLANDSH90', 'HOLTAVORDUHEIDIAH37',
       'HOLTAVORDUHEIDIAH38', 'HOLTAVORDUHEIDIVH49', 'KLUANEPIKA',
       'MODRUVELLIRLH69', 'MO

In [47]:
groups[groups['SUBSITE'] == 'GA66']

Unnamed: 0,SITE,SUBSITE,YEAR,Standard Habit,Abundance,SITESUBSITE
0,AKUREYRI,GA66,1998,bryophyte cover (%),32.4,AKUREYRIGA66
1,AKUREYRI,GA66,1998,deciduous dwarf to low shrub cover (%),14.5,AKUREYRIGA66
2,AKUREYRI,GA66,1998,evergreen dwarf shrub cover (%),5.375,AKUREYRIGA66
3,AKUREYRI,GA66,1998,evergreen dwarf to low shrub cover (%),3.0,AKUREYRIGA66
4,AKUREYRI,GA66,1998,forb cover (%),0.983607,AKUREYRIGA66
5,AKUREYRI,GA66,1998,graminoid cover (%),2.986301,AKUREYRIGA66
6,AKUREYRI,GA66,1998,lichen cover (%),0.5,AKUREYRIGA66
7,AKUREYRI,GA66,1998,soil cover (%),3.0,AKUREYRIGA66
8,AKUREYRI,GA66,2005,bryophyte cover (%),35.0,AKUREYRIGA66
9,AKUREYRI,GA66,2005,deciduous dwarf shrub cover (%),0.5,AKUREYRIGA66


In [121]:
groups = groups.set_index(['Unique Plot', 'Standard Habit']).stack().unstack([1,2])
groups

Standard Habit,bryophyte cover (%),forb cover (%),graminoid cover (%),soil cover (%),deciduous dwarf to low shrub cover (%),evergreen dwarf shrub cover (%),lichen cover (%),deciduous dwarf shrub cover (%),deciduous shrub to tree cover (%),evergreen dwarf to low shrub cover (%),...,unknown species cover (%),litter cover (%),other cover (%),water cover (%),dung cover (%),fungi cover (%),rock cover (%),evergreen dwarf to tall shrub cover (%),deciduous shrub cover (%),unknown shrub cover (%)
Unnamed: 0_level_1,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,...,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance,Abundance
Unique Plot,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AKUREYRIGA66CTL6511998,15.0,2.5,18.5,3.0,,,,,,,...,,,,,,,,,,
AKUREYRIGA66CTL651b2005,38.0,8.0,21.5,,,,,,,,...,,,,,,,,,,
AKUREYRIGA66CTL6521998,38.0,13.0,22.0,,3.0,,,,,,...,,,,,,,,,,
AKUREYRIGA66CTL652b2005,63.0,6.0,21.0,,,3.0,0.5,,,,...,,,,,,,,,,
AKUREYRIGA66CTL6531998,15.0,3.5,53.5,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VALBERCLAALPINEOTCOTC71996,,15.7,3.5,,,,,38.6,,,...,,,,,,,,,,
VALBERCLAALPINEOTCOTC81994,,45.0,1.0,,,,,3.5,,,...,,,,,,,,,,
VALBERCLAALPINEOTCOTC81996,,20.2,1.1,,,,,1.5,,,...,,,,,,,,,,
VALBERCLAALPINEOTCOTC91994,,116.2,3.6,,,,,3.5,,,...,,,,,,,,,,


In [122]:
groups.columns = groups.columns.get_level_values(0)
groups = groups.reset_index()
groups

Standard Habit,Unique Plot,bryophyte cover (%),forb cover (%),graminoid cover (%),soil cover (%),deciduous dwarf to low shrub cover (%),evergreen dwarf shrub cover (%),lichen cover (%),deciduous dwarf shrub cover (%),deciduous shrub to tree cover (%),...,unknown species cover (%),litter cover (%),other cover (%),water cover (%),dung cover (%),fungi cover (%),rock cover (%),evergreen dwarf to tall shrub cover (%),deciduous shrub cover (%),unknown shrub cover (%)
0,AKUREYRIGA66CTL6511998,15.0,2.5,18.5,3.0,,,,,,...,,,,,,,,,,
1,AKUREYRIGA66CTL651b2005,38.0,8.0,21.5,,,,,,,...,,,,,,,,,,
2,AKUREYRIGA66CTL6521998,38.0,13.0,22.0,,3.0,,,,,...,,,,,,,,,,
3,AKUREYRIGA66CTL652b2005,63.0,6.0,21.0,,,3.0,0.5,,,...,,,,,,,,,,
4,AKUREYRIGA66CTL6531998,15.0,3.5,53.5,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,VALBERCLAALPINEOTCOTC71996,,15.7,3.5,,,,,38.6,,...,,,,,,,,,,
1670,VALBERCLAALPINEOTCOTC81994,,45.0,1.0,,,,,3.5,,...,,,,,,,,,,
1671,VALBERCLAALPINEOTCOTC81996,,20.2,1.1,,,,,1.5,,...,,,,,,,,,,
1672,VALBERCLAALPINEOTCOTC91994,,116.2,3.6,,,,,3.5,,...,,,,,,,,,,


In [126]:
groups1 = plotlevel_habit.groupby(['Unique Plot'], group_keys=True).first()
groups1 = groups1.reset_index()
groups1

Unnamed: 0,Unique Plot,SITE,SUBSITE,TRTMT,PLOT,YEAR,SPP,STATUS,TISSUE,Abundance,...,SORENSENS,GEODISTR,Name,Mapping Name,Potential Habit,Habit,Leaf Habit,Potential Height,Height,Standard Habit
0,AKUREYRIGA66CTL6511998,AKUREYRI,GA66,CTL,651,1998,AGRVIN,LIVE,UNK,0.5,...,,,Agrostis vinealis,Agrostis vinealis,graminoid,graminoid,,,,graminoid cover (%)
1,AKUREYRIGA66CTL651b2005,AKUREYRI,GA66,CTL,651b,2005,CARBIG,LIVE,UNK,3.0,...,,,Carex bigelowii,Carex bigelowii,graminoid,graminoid,,,,graminoid cover (%)
2,AKUREYRIGA66CTL6521998,AKUREYRI,GA66,CTL,652,1998,AGRCAP,LIVE,UNK,3.0,...,,,Agrostis capillaris,Agrostis capillaris,graminoid,graminoid,Deciduous,"Shrubs, sprawling, creeping, or upright, to 1 m",dwarf to low shrub,graminoid cover (%)
3,AKUREYRIGA66CTL652b2005,AKUREYRI,GA66,CTL,652b,2005,SELSEL,LIVE,UNK,0.5,...,,,Selaginella selaginoides,Selaginella selaginoides,spore-bearing,spore-bearing,Evergreen,,,forb cover (%)
4,AKUREYRIGA66CTL6531998,AKUREYRI,GA66,CTL,653,1998,AGRCAP,LIVE,UNK,3.0,...,,,Agrostis capillaris,Agrostis capillaris,graminoid,graminoid,,,,graminoid cover (%)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,VALBERCLAALPINEOTCOTC71996,VALBERCLA,ALPINE,OTC,OTC7,1996,FESQUA,LIVE,,0.5,...,V,alpine,Festuca quadriflora,Festuca quadriflora,graminoid,graminoid,Deciduous,"Plants 0.005-0.05 m, (dwarf), forming clonal m...",,graminoid cover (%)
1670,VALBERCLAALPINEOTCOTC81994,VALBERCLA,ALPINE,OTC,OTC8,1994,ACHATR,LIVE,,0.5,...,V,alpine,Achillea atrata,Achillea atrata,forb,forb,Deciduous,"Plants 0.005-0.05 m, (dwarf), forming clonal m...",,forb cover (%)
1671,VALBERCLAALPINEOTCOTC81996,VALBERCLA,ALPINE,OTC,OTC8,1996,POAALP,LIVE,,0.1,...,V,alpine,Poa alpina,Poa alpina,graminoid,graminoid,Deciduous,"Plants 0.005-0.05 m, (dwarf), forming clonal m...",,graminoid cover (%)
1672,VALBERCLAALPINEOTCOTC91994,VALBERCLA,ALPINE,OTC,OTC9,1994,ANDOBT,LIVE,,0.1,...,V,alpine,Androsace obtusifolia,Androsace obtusifolia,forb,forb,Deciduous,"Plants 0.005-0.05 m, (dwarf), forming clonal m...",,forb cover (%)


In [133]:
final = groups.merge(groups1, left_on='Unique Plot', right_on='Unique Plot')
final.drop(columns = ['Name', 'Mapping Name', 
                       'Potential Habit', 'Habit', 
                       'Leaf Habit', 'Potential Height', 
                       'Height', 'Standard Habit'], inplace=True)
final = final.fillna(value=np.nan)

In [134]:
final

Unnamed: 0,Unique Plot,bryophyte cover (%),forb cover (%),graminoid cover (%),soil cover (%),deciduous dwarf to low shrub cover (%),evergreen dwarf shrub cover (%),lichen cover (%),deciduous dwarf shrub cover (%),deciduous shrub to tree cover (%),...,CAVMBROAD,GRSTRAT,WOODY,GFNARROWarft,GFNARROWwalker,GENUS,SPECIES,RAUNKIAERS,SORENSENS,GEODISTR
0,AKUREYRIGA66CTL6511998,15.0,2.5,18.5,3.0,,,,,,...,G,,HERB,GRAMINOID,GRASS,Agrostis,vinealis,,,
1,AKUREYRIGA66CTL651b2005,38.0,8.0,21.5,,,,,,,...,G,,HERB,GRAMINOID,SEDGE,Carex,bigelowii,,,
2,AKUREYRIGA66CTL6521998,38.0,13.0,22.0,,3.0,,,,,...,G,,HERB,GRAMINOID,GRASS,Agrostis,capillaris,,,
3,AKUREYRIGA66CTL652b2005,63.0,6.0,21.0,,,3.0,0.5,,,...,G,,HERB,FORBSV,SLVASC,Selaginella,selaginoides,,,
4,AKUREYRIGA66CTL6531998,15.0,3.5,53.5,,,,,,,...,G,,HERB,GRAMINOID,GRASS,Agrostis,capillaris,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1669,VALBERCLAALPINEOTCOTC71996,,15.7,3.5,,,,,38.6,,...,G,PHAL,HERB,FORBSV,GRASS,Festuca,quadriflora,HE,V,alpine
1670,VALBERCLAALPINEOTCOTC81994,,45.0,1.0,,,,,3.5,,...,G,PHAL,HERB,FORBSV,FORB,Achillea,atrata,HE,V,alpine
1671,VALBERCLAALPINEOTCOTC81996,,20.2,1.1,,,,,1.5,,...,G,PHAL,HERB,FORBSV,GRASS,Poa,alpina,HE,V,alpine
1672,VALBERCLAALPINEOTCOTC91994,,116.2,3.6,,,,,3.5,,...,G,RAMBO,HERB,FORBSV,FORB,Androsace,obtusifolia,HE,V,alpine


In [135]:
# save
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/TVC/'
f = 'ALL_TVC_pft_fcover_00.csv'
final.to_csv(p + f)

In [116]:
gdf = gpd.GeoDataFrame(data, 
                       geometry=gpd.points_from_xy(data['Longitude (decimal degrees)'], data['Latitude (decimal degrees)']), 
                       crs="EPSG:4326")

NameError: name 'gpd' is not defined

In [64]:
# select for 3 watersheds
huc_p = '/mnt/poseidon/remotesensing/arctic/data/vectors/supplementary'
huc = read_dataframe(f'{huc_p}/wbdhu6_a_us_september2022.gdb')
ak_huc6 = huc[huc['states'].str.contains('AK')]
aois = ['190604', '190603', '190602']
huc_aoi = ak_huc6[ak_huc6['huc6'].isin(aois)]

In [None]:
# select data
poi = gdf[gdf.intersects(huc_aoi.unary_union)]