# AKVEG Standardization Notebook
---
---

# Load required packages

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from datetime import date, timedelta
from pyogrio import read_dataframe
import glob
import geopandas as gpd
import os
import standardize_pft_funcs as spf
import chardet
import tarfile
from urllib.request import urlretrieve
import regex as re

---
---
# 1. Fcover data extraction and pre-standardization
After visually inspecting each species-level fcover table for obvious errors, we saved the "cleaned" tables to a folder that we then standardized here.

---
## 1.1. Load species-level fcover table

In [2]:
# read akveg npr-a data, checklist, and ancillary data
fcover_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/AKVEG_foliar_cover.csv'
fcover = pd.read_csv(fcover_path, index_col=0)
fcover.head(3)

Unnamed: 0_level_0,Project,Site Code,Date,Observer,Recorder,Cover Type,Accepted Name,Cover
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,AIM NPR-A,AB-1B,2012-08-01,Tina Boucher,,total cover,Arnica lessingii,0.0
2,AIM NPR-A,AB-1B,2012-08-01,Tina Boucher,,total cover,Artemisia arctica,0.0
3,AIM NPR-A,AB-1B,2012-08-01,Tina Boucher,,total cover,Cardamine bellidifolia,0.0


In [3]:
# remove species names with 0 cover
# not all recorded species actually had any data...
temp = fcover.copy()
temp['Cover'].replace(0, np.nan, inplace=True)
fcover = temp.dropna(subset=['Cover'])
aux = fcover.iloc[:,:-1]
fcover = fcover[['Site Code', 'Accepted Name', 'Cover']]

In [4]:
nonzero_cover = fcover.copy()
nonzero_cover = nonzero_cover.dropna(how='any')
nonzero_cover = nonzero_cover[nonzero_cover['Cover'] != 0]

In [5]:
# get unique species names from narrowed down list
u_species_names = nonzero_cover['Accepted Name'].unique()
u_species_names = [x for x in u_species_names if pd.notnull(x)]
species_names_df = pd.DataFrame(u_species_names, columns=['datasetSpeciesName'])

---
## 2.2. Left join species checklist to fcover datasets

In [6]:
# load species checklist and prep for joining
checklist_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/AKVEG_species_checklist.csv'
checklist_df = read_dataframe(f'{checklist_path}')
checklist_df.rename(columns={'Code': 'nameCode',
                             'Name':'checklistSpeciesName',
                             'Status': 'nameStatus',
                             'Accepted Name': 'nameAccepted',
                             'Family': 'nameFamily',
                             'Name Source': 'acceptedNameSource',
                             'Level': 'nameLevel',
                             'Category': 'speciesForm',
                             'Habit': 'speciesHabit'
                            }, inplace=True)

In [7]:
# checklist table
checklist_df.head(3)

Unnamed: 0,nameCode,checklistSpeciesName,nameStatus,nameAccepted,nameFamily,acceptedNameSource,nameLevel,speciesForm,speciesHabit
0,abroth,Abrothallus De Not.,accepted,Abrothallus De Not.,Abrothallaceae,CNALH,genus,lichen,lichen
1,abrpar,Abrothallus parmeliarum (Sommerf.) Arnold,accepted,Abrothallus parmeliarum (Sommerf.) Arnold,Abrothallaceae,CNALH,species,lichen,lichen
2,abrpey,Abrothallus peyritschii (Stein) Kotte,accepted,Abrothallus peyritschii (Stein) Kotte,Abrothallaceae,Dillman et al. 2012,species,lichen,lichen


In [8]:
species_names_df.head(3)

Unnamed: 0,datasetSpeciesName
0,Carex microchaeta
1,Potentilla elegans
2,Smelowskia porsildii


In [9]:
# get first 2 words (genus-species) from checklist accepted name and data species name
checklist_df['joinKey'] = checklist_df['checklistSpeciesName'].apply(spf.get_substrings)
species_names_df['joinKey'] = species_names_df['datasetSpeciesName'].apply(spf.get_substrings)


habits = spf.join_to_checklist(unique_species=species_names_df, 
                               checklist=checklist_df, 
                               u_name='datasetSpeciesName', 
                               c_unofficial_name='checklistSpeciesName', 
                               c_official_name='nameAccepted', 
                               mapping_name='joinKey',
                               habit='speciesHabit')

53 species are missing habits.
50 species still missing habits.
6 species still missing habits.
6 species still missing habits.


In [10]:
habits.head(3)

Unnamed: 0_level_0,datasetSpeciesName,joinKey,speciesHabit
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Carex microchaeta,Carex microchaeta,graminoid
1,Potentilla elegans,Potentilla elegans,forb
2,Smelowskia porsildii,Smelowskia porsildii,forb


In [11]:
species_names_df.head(3)

Unnamed: 0,datasetSpeciesName,joinKey
0,Carex microchaeta,Carex microchaeta
1,Potentilla elegans,Potentilla elegans
2,Smelowskia porsildii,Smelowskia porsildii


In [12]:
habits = species_names_df.merge(habits, how='left', left_on='datasetSpeciesName', right_on='datasetSpeciesName', suffixes=(None, '_1'))
habits = habits[['joinKey', 'datasetSpeciesName', 'speciesHabit']]

---
## 1.3. Add leaf retention to shrub species

In [13]:
# load leaf retention table and clean
leaf_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/ava_turboveg'
leaf_file = 'macander22_leaf_retention.csv'
leaf_ret = pd.read_csv(f'{leaf_path}/{leaf_file}', header=None)
leaf_ret.columns = ['leafRetention', 'retentionSpeciesName']
leaf_ret.replace(to_replace='Deciduous Shrubs', value='deciduous', inplace=True)
leaf_ret.replace(to_replace='Evergreen Shrubs', value='evergreen', inplace=True)

In [14]:
leaf_ret.head(5)

Unnamed: 0,leafRetention,retentionSpeciesName
0,deciduous,Salix
1,deciduous,Betula nana
2,deciduous,Vaccinium uliginosum
3,deciduous,Alnus alnobetula ssp. sinuata
4,deciduous,Betula glandulosa


### 1.3.1. Join leaf habit column to habit table

In [15]:
# add leaf retention column
habits_wleaf = spf.add_leaf_retention(habits, leaf_ret, 'leafRetention')
habits_wleaf = habits_wleaf[['datasetSpeciesName', 'joinKey', 'speciesHabit', 'leafRetention']]
habits_wleaf.head(5)

Unnamed: 0,datasetSpeciesName,joinKey,speciesHabit,leafRetention
0,Carex microchaeta,Carex microchaeta,graminoid,
1,Potentilla elegans,Potentilla elegans,forb,
2,Smelowskia porsildii,Smelowskia porsildii,forb,
3,Umbilicaria,Umbilicaria,lichen,
4,Alectoria,Alectoria,lichen,


In [16]:
# function to remove any extra words with shrub; 
# i.e., if it says shrub, assume it's shrub until proven otherwise
def clean_shrub_habits(row):
    if isinstance(row, float):
        return np.nan
    if 'shrub' in row:
        return 'shrub'
    else:
        return row

# apply function
habits_wleaf['speciesHabit'] = habits_wleaf['speciesHabit'].apply(clean_shrub_habits)

In [17]:
# a list of current habits
list(habits_wleaf['speciesHabit'].unique())

['graminoid',
 'forb',
 'lichen',
 'shrub',
 'moss',
 'spore-bearing',
 'liverwort',
 nan]

---
## 1.4. Export shrubs, nonshrubs, and null habits

In [18]:
# get all shrub species
habits_complete = habits_wleaf.copy()
nonnull = habits_complete[~habits_complete['speciesHabit'].isnull()]
shrubs = nonnull[nonnull['speciesHabit'].str.contains('shrub')]
shrubs.to_csv(f'/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/akveg_shrubs.csv')

In [19]:
# get all non-shrub species
nonshrubs = nonnull[~nonnull['speciesHabit'].str.contains('shrub')]
nonshrubs.to_csv(f'/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/akveg_nonshrubs.csv')

In [20]:
# get null habits
null = habits_complete[habits_complete['speciesHabit'].isnull()]
null.to_csv(f'/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/akveg_nullhabit.csv')

In [21]:
len(null) + len(nonshrubs) + len(shrubs)

384

In [22]:
len(habits_complete)

384

---
## 1.5. Clean shrubs, nonshrubs, and unknown (null) habits
Here, we manually assessed each file. The nonshrubs were usually correct and did not have to be adjusted. Some shrubs were missing leaf retention information, so we consulted with arctic ecologists to determine it. For unknown (null) habits, we consulted arctic ecology expert, Amy Breen, to determine the issue and correct habit. After the tables were complete, all species names had a single associated habit.
- null habits
    - assign missing habits and consult ecologist when needed
- non-shrubs
    - assign leaf habit to all trees (ctrl-f to find all trees in excel)
        - coniferous tree = evergreen
        - deciduous tree  = deciduous
- shrubs
    - check current leaf habits and correct if necessary
    - fill empty leaf habits
    - choose leaf habit if both evergreen & deciduous listed
    - add Stature column (dwarf shrub or not)

---
---
# 2. Standardization

---
## 2.1. Standardize habit names to match our PFT schema
- deciduous shrub
- evergreen shrub
- graminoid
- forb
- litter
- non-vascular
    - lichen
    - brophyte

### 2.1.1. Load cleaned habit files

In [23]:
# paths to visually assessed and cleaned habit files
base_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/akveg'
nonshrub = 'akveg_nonshrubs_01.csv'
null = 'akveg_nullhabit_01.csv'
shrub = 'akveg_shrubs_01.csv'
all_habit_types = [nonshrub, shrub, null]

# create list of habit dataframes
habit_dfs = []
for habit_type in all_habit_types:
    habit_df = pd.read_csv(f'{base_path}/{habit_type}', index_col=0)
    habit_dfs.append(habit_df)
all_habits = pd.concat(habit_dfs)

### 2.1.2. Check habit names to standardize

In [24]:
# show unique habit names we're working with
all_habits['speciesHabit'].unique().tolist()

['graminoid',
 'forb',
 'lichen',
 'moss',
 'spore-bearing',
 'liverwort',
 'shrub',
 'algae',
 'fungus']

In [25]:
# akveg-specific habit name standardization function
def standardize_habit(habitrow, leafrow):
    
    habitrow = habitrow.to_numpy()
    leafrow = leafrow.to_numpy()
    new_row = []
    
    for habit, leaf in zip(habitrow, leafrow):
        
        habit = habit.lower()
        # non-shrubs
        if 'algae' in habit:
            new_habit = 'lichen'
        elif 'moss' in habit or 'liverwort' in habit:
            new_habit = 'bryophyte'
        elif 'spore-bearing' in habit:
            new_habit = 'forb'
            
        # shrubs and trees
        elif 'shrub' in habit:
            new_habit = f'{leaf} shrub'
            
        else:
            new_habit = f'{habit}'
        new_row.append(new_habit)
    
    return new_row

In [26]:
# standardized PFT
standard_habit_col = standardize_habit(all_habits['speciesHabit'], 
                                       all_habits['leafRetention'])

In [27]:
all_habits['standardHabit'] = standard_habit_col

In [28]:
all_habits.head(5)

Unnamed: 0,datasetSpeciesName,joinKey,speciesHabit,leafRetention,speciesStature,standardHabit
0,Carex microchaeta,Carex microchaeta,graminoid,,,graminoid
1,Potentilla elegans,Potentilla elegans,forb,,,forb
2,Smelowskia porsildii,Smelowskia porsildii,forb,,,forb
3,Umbilicaria,Umbilicaria,lichen,,,lichen
4,Alectoria,Alectoria,lichen,,,lichen


In [29]:
p = '/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/'
f = 'akveg_all_habits.csv'
all_habits.to_csv(p + f)

In [73]:
# loop through each plot and sum fcover per pft
all_habits = all_habits[['datasetSpeciesName', 'standardHabit']]
standard_fcover = fcover.merge(all_habits, 
                               left_on='Accepted Name', 
                               right_on='datasetSpeciesName')
# group by 'Site Code' and 'standardHabit', then sum the 'Cover'
temp = standard_fcover.groupby(['Site Code', 'standardHabit'])['Cover'].sum().reset_index()
temp = temp.pivot(index='Site Code', columns='standardHabit', values='Cover')
standard_fcover = temp.fillna(0)

In [74]:
# clean up
standard_fcover.columns.name = None
standard_fcover.index.name = 'siteCode'
standard_fcover.index = standard_fcover.index.astype(str)
standard_fcover.head(5)

Unnamed: 0_level_0,bryophyte,deciduous shrub,evergreen shrub,forb,fungus,graminoid,lichen
siteCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AB-1B,0.0,0.0,0.0,1.4,0.0,0.7,12.7
AB-5B,2.7,14.7,22.0,2.7,0.0,6.7,14.1
ADST-2,2.6,10.0,18.7,4.2,0.0,3.3,7.5
ADST-21,26.5,19.3,52.6,5.5,0.0,6.0,41.3
ADST-33,8.1,14.7,30.0,27.5,0.0,7.4,7.5


---
## 2.2. Standardize pfts

In [75]:
# list of necessary PFTs
pfts = ['deciduous shrub', 'deciduous tree',
        'evergreen shrub', 'evergreen tree',
        'forb', 'graminoid', 'non-vascular',
        'bryophyte', 'lichen', 'litter']

In [76]:
# add columns that might be missing
def add_standard_cols(groups, pft_cols):
    
    # add missing columns and fill with nan
    cols = groups.columns.tolist()
    addcols = []
    for nc in pft_cols:
        if nc not in cols:
            addcols.append(nc)
    groups[addcols] = np.nan
    return groups

### 2.2.1 Add non-vegetation data

In [93]:
# load non-vegetation data and select
nonveg = pd.read_csv('/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/BLM_Natl_AIM_TerrADat_Hub.csv')
nonveg = nonveg[['AH_TotalLitterCover', 'FH_RockCover', 
                 'FH_WaterCover', 'BareSoilCover', 'PlotID']]
nonveg = nonveg.rename(columns=dict(PlotID='plotName',
                                    AH_TotalLitterCover='litter',
                                    FH_RockCover='bare ground',
                                    FH_WaterCover='water',
                                    BareSoilCover='bare ground'))
nonveg = nonveg.groupby(nonveg.columns, axis=1).sum()
nonveg = nonveg.set_index('plotName')
nonveg.head(5)

Unnamed: 0_level_0,bare ground,litter,water
plotName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FMTT-5B,0.0,40.816327,0.0
FWMM-5B,0.0,48.0,4.0
FWMM-1,0.0,65.333333,2.0
ADST-4,0.0,22.0,0.0
ADST-1,2.666667,28.0,0.0


In [78]:
# add non-vegetation information
standard_fcover = standard_fcover.merge(nonveg, 
                                        how='left', 
                                        left_index=True, 
                                        right_index=True)

In [79]:
# create non-vascular
standard_fcover['non-vascular'] = standard_fcover['bryophyte'] + standard_fcover['lichen']

In [80]:
# create standard columns
standard_fcover = add_standard_cols(standard_fcover, pfts)
standard_fcover = standard_fcover[pfts]
standard_fcover = standard_fcover.merge(nonveg, how='left', left_index=True, right_index=True)
standard_fcover.head(5)

Unnamed: 0_level_0,deciduous shrub,deciduous tree,evergreen shrub,evergreen tree,forb,graminoid,non-vascular,bryophyte,lichen,litter,baregroundCover,litterCover,waterCover
siteCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AB-1B,0.0,,0.0,,1.4,0.7,12.7,0.0,12.7,,20.666667,0.0,0.0
AB-5B,14.7,,22.0,,2.7,6.7,16.8,2.7,14.1,,14.0,16.0,0.0
ADST-2,10.0,,18.7,,4.2,3.3,10.1,2.6,7.5,,14.0,10.0,0.0
ADST-21,19.3,,52.6,,5.5,6.0,67.8,26.5,41.3,,14.666667,44.666667,0.0
ADST-33,14.7,,30.0,,27.5,7.4,15.6,8.1,7.5,,27.333333,21.333333,0.0


---
## 2.3. Create ancillary

Plot information:
- [ ] UID
- [x] plotName
- [x] dataSource
- [x] dataSubsource
- [x] fcoverScale
- [x] surveyMethod
- [x] surveyPurpose
- [x] duplicatedCoords
- [x] duplicatedDate
- [x] surveyYear
- [x] surveyMonth
- [x] surveyDay

Geographical information:
- [x] bioclimateSubzone
- [x] adminCountry
- [x] adminUnit
- [x] fireYears
- [x] coordEpsg
- [x] georefSource
- [x] georefAccuracy
- [x] plotRadius
- [x] longitueX
- [x] latitudeY

In [81]:
# load auxiliary data
fcover_aux = pd.read_csv('/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/AKVEG_foliar_cover.csv', index_col=0)
fcover_aux = fcover_aux.groupby('Site Code').first()
aux = pd.read_csv('/mnt/poseidon/remotesensing/arctic/data/vectors/akveg/AKVEG_ancillary.csv', index_col=0)
# sampling methods, coverscale, data source
methods = pd.read_csv('/mnt/poseidon/remotesensing/arctic/data/vectors/supplementary/sampling_methods.csv')

In [82]:
# merge aux tables into one
aux = fcover_aux.merge(aux, how='left', 
                       left_on='Site Code', 
                       right_on='Site Code')

### 2.3.1. Plot Information

In [83]:
new_aux = pd.DataFrame()

# date columns
aux['surveyDate'] = pd.to_datetime(aux['Date'], format='mixed')
new_aux['surveyYear'] = aux['surveyDate'].dt.year
new_aux['surveyMonth'] = aux['surveyDate'].dt.month
new_aux['surveyDay'] = aux['surveyDate'].dt.day

# plot size
new_aux['plotRadius'] = aux['Plot Dimensions'].apply(lambda x: re.findall(r'\b\d+\b', x)[0])

# geographical information
new_aux['latitudeY'] = aux['Latitude']
new_aux['longitudeX'] = aux['Longitude']
new_aux['georefSource'] = 'GPS'
new_aux['georefAccuracy'] = aux['Uncertainty']
new_aux['coordEPSG'] = 'EPSG:4326' # will need to convert from EPSG:4269

# main
new_aux['plotName'] = aux['Site Code']
new_aux['dataSource'] = 'AKVEG'
new_aux['dataSubsource'] = 'AIM NPR-A Timm Nawrocki'
new_aux = new_aux.merge(methods, how='left', left_on='dataSubsource', right_on='dataSubsource')
new_aux = new_aux.set_index(new_aux['plotName'])

### 2.3.2. Geographical Intersections

In [84]:
# gaul download: https://data.jrc.ec.europa.eu/dataset/jrc-10112-10004
# countries and sub-admin
gaul1_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/supplementary/gaul/gaul1/gaul1_asap.shp'
gaul1 = gpd.read_file(gaul1_path)
gaul1 = gaul1.to_crs('EPSG:5936')

# fire history
fire_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/supplementary/historical_fire_perimeters/InterAgencyFirePerimeterHistory_All_Years_View.shp'
fire = gpd.read_file(fire_path)
fire = fire.to_crs('EPSG:5936')

# bioclimate subzones
bioclim_path = '/mnt/poseidon/remotesensing/arctic/data/vectors/supplementary/bioclimate_la_latlon/bioclimate_la_latlon.shp'
bioclim = gpd.read_file(bioclim_path)
bioclim = bioclim.to_crs('EPSG:5936')

In [85]:
# add geospatial information
fcover_and_aux = pd.concat([standard_fcover, new_aux], axis=1)
fcover_and_aux = gpd.GeoDataFrame(fcover_and_aux, 
                                  geometry=gpd.points_from_xy(fcover_and_aux['longitudeX'],
                                                              fcover_and_aux['latitudeY']),
                                  crs='EPSG:4269')
fcover_and_aux = fcover_and_aux.to_crs('EPSG:5936')
fcover_and_aux = gpd.sjoin(fcover_and_aux, gaul1[['name1', 'name0', 'geometry']], predicate='intersects', how='left', rsuffix='gaul')
fcover_and_aux = gpd.sjoin(fcover_and_aux, fire[['FIRE_YEAR', 'geometry']], predicate='intersects', how='left', rsuffix='fire')
fcover_and_aux = gpd.sjoin(fcover_and_aux, bioclim[['zone', 'geometry']], predicate='intersects', how='left', rsuffix='bioclim')
fcover_and_aux = fcover_and_aux.drop(columns=['index_gaul', 'index_fire', 'index_bioclim', 'plotName'])

In [86]:
# rename new columns
fcover_and_aux.rename(columns={'name1':'adminUnit',
                               'name0':'adminCountry',
                               'FIRE_YEAR':'fireYears',
                               'zone':'bioclimSubzone'}, inplace=True)

### 2.3.3. Duplicate lat/lon (revisted or very nearby)

In [87]:
# populates a column with the indicies of duplicated
# information; e.g., duplicate coords or dates
def find_duplicates(df, subset, col_name):
    temp = df.copy()
    if temp.duplicated(subset=subset).any():
        print('duplicates found')
        duplicates = temp.duplicated(subset=subset, keep=False)
        duplicate_groups = temp[duplicates].groupby(subset).apply(lambda x: list(x.index)).reset_index(name='indices')
        index_to_duplicates = {idx: indices for indices in duplicate_groups['indices'] for idx in indices}
        temp[col_name] = temp.index.map(index_to_duplicates)
    else:
        print('no duplicates found')
    return temp

In [88]:
# duplicate coord column
coords = ['longitudeX', 'latitudeY']
fcover_and_aux['duplicatedCoords'] = np.nan
fcover_and_aux = find_duplicates(fcover_and_aux, coords, 'duplicatedCoords')

# duplicate date column
date = ['surveyYear', 'surveyMonth', 'surveyDay']
fcover_and_aux['duplicatedDate'] = np.nan
fcover_and_aux = find_duplicates(fcover_and_aux, date, 'duplicatedDate')

no duplicates found
duplicates found


---
# 3. Export

In [89]:
# clean up
pfts = {'deciduous shrub':'deciduousShrubCover',
        'evergreen shrub':'evergreenShrubCover',
        'deciduous tree':'deciduousTreeCover',
        'evergreen tree':'evergreenTreeCover',
        'forb':'forbCover',
        'graminoid':'graminoidCover',
        'non-vascular':'nonvascularSumCover',
        'bryophyte':'bryophyteCover',
        'lichen':'lichenCover',
        'litter':'litterCover',
        'bare ground':'baregroundCover',
        'water':'waterCover'}

# rename columns and replace NaN cover with 0
fcover_and_aux.rename(columns=pfts, inplace=True)
fcover_cols = list(pfts.values())
fcover_and_aux[fcover_cols] = fcover_and_aux[fcover_cols].fillna(0.0)

# reproject and set cover data type
fcover_and_aux = fcover_and_aux.to_crs('EPSG:4326')
fcover_and_aux[fcover_cols] = fcover_and_aux[fcover_cols].astype(np.float32)

ValueError: Columns must be same length as key

In [None]:
fcover_and_aux.to_csv('akveg_fcover_data.csv')