In [2]:
import ee
import geemap
import pandas as pd
import geopandas as gpd
import glob
from pathlib import Path
import numpy as np
import time
import regex as re

In [3]:
ee.Initialize()

# Prudhoe Spine Rd

In [4]:
# read csv the json is based on
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/AK-ArcticVegetationArchive'
subd = 'aava_pruarc_dwalker_2015_alldata/aava_prudhoebayarcsees_modified_source_data/prudhoebayarcsees_modified_environmental_data'
f = 'aava_pruarc_dwalker_2015_allenv_modsrc.csv'
csv = pd.read_csv(f'{d}/{subd}/{f}', na_values=[-9999.0], quotechar='"', encoding="ISO-8859-1")

In [5]:
# clean up dataframe
csv = csv.iloc[5:]
new_header = csv.iloc[0] #grab the first row for the header
csv = csv[1:] #take the data minus the header row
csv.columns = new_header
csv.reset_index(inplace=True, drop=True)

In [6]:
# edit all columns
csv.columns = csv.columns.str.lower() #make lowercase
csv.columns = csv.columns.str.strip() #remove whitespace
csv.columns = csv.columns.str.replace(' ', '_') #replace spaces with _

In [7]:
# select cover columns
cover = csv.filter(regex='(percent)') # select percent columns (PFT PCover columns)
cover = cover[cover.columns.drop(list(cover.filter(regex='_dead_')))] # Ignore dead vegetation

In [8]:
# clean up cover columns
new_cover_cols = []
for col in cover.columns.tolist():
    col = col.replace('_live_', '') #get rid of 'live'
    col = re.sub(r'\(.*?\)', '', col) # remove anything in ()
    col = f'cover_{col}' # add cover suffix
    col = col.strip('_') # remove trailing _
    new_cover_cols.append(col)

In [10]:
# create final csv
replacements = {i:j for i,j in zip(cover.columns.tolist(),new_cover_cols)}
csv.rename(columns=replacements, inplace=True) # replace cover cols
csv.columns = [re.sub(r'\(.*?\)', '', i) for i in csv.columns.tolist()] # remove ()
csv.columns = csv.columns.str.strip('_') #remove trailing _
csv.columns = [re.sub(r'_{2,}', '', i) for i in csv.columns.tolist()] # remove double _

In [246]:
# export as json
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v2'
f = 'spine_rd_prudhoe_bay_veg_plots_environmental.geojson'

gdf = gpd.GeoDataFrame(csv, geometry=gpd.points_from_xy(csv.longitude, csv.latitude), crs="EPSG:4326") 
with open(f'{d}/{f}' , 'w') as file:
    file.write(gdf.to_json())

In [None]:
# be sure to drop columns with dead in the name

# Atqasuk

In [13]:
# read geojson
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'Atqasuk_Environmental_Data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [23]:
# clean up dataframe
# gj.columns

In [22]:
# drop cover_total

# Barrow NGEE

In [7]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'Barrow_NGEE_Arctic_Environmental_Data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [8]:
gj.columns

Index(['id', 'field_plot_number', 'date', 'releve_area', 'shape',
       'species_coverscale', 'field_community_name', 'author', 'location',
       'georeference_source', 'accuracy', 'latitude', 'longitude', 'elevation',
       'slope', 'aspect', 'habitat_type', 'site_moisture_regime',
       'organic_depth', 'soil_ph', 'cover_of_trees', 'cover_of_shrubs',
       'cover_of_tall_shrub', 'cover_of_low_shrub', 'cover_of_dwarf_shrub',
       'cover_of_prostrate_dwarf_shrub', 'cover_of_graminoid',
       'cover_of_tussock_graminoid', 'cover_of_forb',
       'cover_of_seedless_vascular_plants', 'cover_of_moss', 'cover_of_lichen',
       'cover_of_soil_crust', 'cover_of_algae', 'cover_of_soil',
       'cover_of_rock', 'cover_of_water', 'cover_of_litter',
       'cover_vegetation', 'mean_vegetation_height', 'shrub_height',
       'herb_height', 'polygon_type', 'geometry'],
      dtype='object')

In [31]:
# drop cover_of_shrubs and cover_vegetation

# Barrow Tundra

In [33]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'Barrow_Tundra_IBP_Environmental_Data_Repeat_Plots_2010.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [63]:
# replace -9 with NaN if necessary. May not be necessary. Pandas might pick up the -9 (it did not)
gj.replace(-9, np.nan, inplace=True)
# gj.columns

# Arctic Network

In [56]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'arctic_network_environmental_data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [57]:
# Fix dwarf needleleaf columns
gj.rename(columns={'cover_dwarf_needleaf': 'dwarf_needleleaf_crown_class',
                   'Cover_dwarf_needleleaf': 'cover_dwarf_needleleaf'}, inplace=True)

In [61]:
# export as json
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v2'
f = 'arctic_network_environmental_data.geojson'

gdf = gpd.GeoDataFrame(gj, geometry=gpd.points_from_xy(gj.longitude, gj.latitude), crs="EPSG:4326") 
with open(f'{d}/{f}' , 'w') as file:
    file.write(gdf.to_json())

# Atlas-1

In [4]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'atlas-1_environmental_data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [5]:
gj.columns

Index(['id', 'site', 'field_plot_number', 'date', 'latitude', 'longitude',
       'elevation', 'surficial_geology', 'surficial_geomorphology',
       'topographic_position', 'slope', 'aspect', 'releve_shape',
       'releve_area', 'organic_layer_depth', 'soil_ph', 'soil_texture',
       'plant_community_name', 'cover_low_shrub', 'cover_dwarf_shrub',
       'cover_prostrate_dwarf_shrub', 'cover_evergreen_shrubs',
       'cover_deciduous_shrubs', 'cover_forbs', 'cover_graminoids',
       'cover_lichens', 'cover_bryophytes', 'cover_horsetails', 'cover_rocks',
       'cover_bare_soil', 'cover_water', 'cover_frost_scars', 'total_dead',
       'canopy_ht', 'remarks', 'geometry'],
      dtype='object')

In [None]:
# I don't know what to do with this one.
# the shrubs add up to deciduous + evergreen only sometimes.
# maybe I just drop the parent columns and pray for now.

# Flux Tower

In [8]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'flux_tower_zona_enivronmental_data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [9]:
gj.columns

Index(['id', 'field_releve_number', 'general_location', 'specific_location',
       'survey_date', 'microtopography', 'latitude', 'longitude',
       'gps_accuracy_m', 'community_name', 'soil_ph',
       'organic_layer_thickness_cm', 'depth_of_standing_water_cm',
       'vegetation_mean_height_cm', 'herb_layer_height_cm', 'moss_height_cm',
       'cover_tree', 'cover_shrub', 'cover_tall_shrub', 'cover_low_shrub',
       'cover_dwarf_shrub', 'cover_prostrate_dwarf_shrub', 'cover_graminoids',
       'cover_tussock_graminoids', 'cover_forb',
       'cover_seedless_vascular_plant', 'cover_moss', 'cover_lichen',
       'cover_crustose_lichen', 'cover_algae', 'cover_soil', 'cover_rock',
       'cover_water', 'cover_litter', 'cover_total_vegetation',
       'location_remarks', 'geometry'],
      dtype='object')

In [10]:
# Drop cover_shrub and cover_total_vegetation. Shrubs add up to cover_shrub. 
# The two lichens are separate from each other. So are the gramminoids

# Oumalik

In [11]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'oumalik_environmental_data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [12]:
gj.columns

Index(['id', 'plot_number', 'date', 'latitude', 'longitude', 'community',
       'plant_community_name', 'microrelief_type ', 'microrelief_ht ',
       'releve_area', 'releve_shape', 'cover_tree_layer', 'cover_shrub_layer',
       'cover_tall_shrubs', 'cover_low_shrubs', 'cover_erect_dwarf_shrubs',
       'cover_prostrate_dwarf_shrubs', 'cover_graminoids',
       'cover_tussock_graminoids', 'cover_forbs',
       'cover_seedless_vascular_plants', 'cover_mosses_liverworts',
       'cover_lichen_layer', 'cover_crust', 'cover_algae_layer',
       'cover_bare_soil ', 'cover_bare_rock', 'cover_water ',
       'cover_litter_layer', 'cover_total', 'canopy_ht', 'tree_layer_ht',
       'shrub_layer_ht', 'herb_layer_ht', 'moss_layer_ht', 'thaw_depth ',
       'aspect', 'slope', 'water_depth', 'site_moisture ', 'summer_air_temp ',
       'duration_snow', 'wind_regime ', 'age_surface ', 'surface_stability ',
       'cryoturbation ', 'disturbance_type', 'disturbance_intensity',
       'disturbance_s

In [13]:
# Cover_total is never more than 100, but when I add up all the cover, 
# it can definitely be more than 100. Not getting that.
# Def need to drop cover_total though.
# cover_shrub_layer is always null, but maybe drop just in case.

# Tundra Fire

In [14]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'tundra_fire_veg_plots_environmental_data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [16]:
# this has a lot of columns but it's literally just all null
# only litter and soil

# Happy Valley

In [4]:
d = '/mnt/poseidon/remotesensing/arctic/data/vectors/alaska_plot_data/v1'
f = 'Happy_Valley_Environmental_Data.geojson'
gj = gpd.read_file(f'{d}/{f}')

In [6]:
gj.columns

Index(['id', 'field_plot_number', 'principal_habitat',
       'preliminary_community_name', 'plant_community', 'microsite',
       'date_sampled', 'latitude', 'longitude', 'landform', 'landform_desc',
       'surficial_geology', 'surficial_geology_desc',
       'surficial_geomorphology', 'surficial_geomorphology_desc',
       'microsite_code', 'microsite_desc', 'glacial_geology',
       'glacial_geology_desc', 'topographic_position',
       'topographic_position.1', 'soil_units', 'soil_units_desc', 'slope',
       'aspect', 'thaw_depth',
       'thaw_depth_standard_deviation_where_calculated', 'site_moisture',
       'site_moisture_desc', 'soil_moisture', 'soil_moisture_desc', 'exposure',
       'exposure_desc', 'estimated_snow_duration',
       'estimated_snow_duration_desc', 'animal_human_disturbance',
       'animal_human_disturbance_desc', 'stability', 'stability_desc',
       'plot_size', 'cover_tall_shrubs', 'cover_low_shrubs',
       'dwarf_shrub_cover', 'cover_evergreen_shrubs'

# Frost Boils

In [None]:
# I will also re-assess this one's shrub columns.
# They all seem to add up, 
# but I want to make sure evergreen/deciduous aren't parents