In [1]:
# Import libraries
import os
import datetime
import requests
import io
from glob import glob

import earthpy as et
import numpy as np
import pandas as pd

In [3]:
# Define a filepath to 'earth-analytics/data/kba_th_sa' directory
# (kba_th_sa is for KBA Threshold Sensitivity Analysis)
data_path = os.path.normpath(os.path.join(et.io.HOME, 
                                          'earth-analytics', 
                                          'data', 
                                          'kba_th_sa'))

# Check if 'kba_th_sa' directory exists (create if needed), 
# then change working directory to 'earth-analytics/data/kba_th_sa'
if os.path.exists(data_path):
 print('Working directory is set to earth-analytics/data/kba_th_sa.')
else:
 print("Creating new directory; working directory set to newly created "
       "'earth-analytics/data/kba_th_sa'.")
 os.makedirs(data_path)
 
os.chdir(data_path)


Working directory is set to earth-analytics/data/kba_th_sa.


In [4]:
# Download the csv file stored on GitHub repository (contains info on selected ecosystems taken 
# from the LF_EVT_2020_README file, with an added 'Short_Name' field that is used as index)
url_ecoinfo = "https://raw.githubusercontent.com/csandberg303/kba-threshold-sensitivity-analysis/main/assets/data/from_LF_EVT_2020_README.csv"
ecoinfo = requests.get(url_ecoinfo).content

# Reading the downloaded content and turning it into a pandas dataframe
ecoinfo_df = pd.read_csv(io.StringIO(ecoinfo.decode('utf-8'))).set_index("Short_Name")

ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,A3_FINAL,B1_FINAL,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,CR,LC,LC,EN,VU,CR,G1,204,252,105
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,VU,LC,LC,NT,LC,VU,G3,218,238,243
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,LC,EN,EN,VU,CR,CR (EN-CR),G1G2,144,201,143
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,EN,LC,LC,VU-EN,EN,EN,G2,149,143,26
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,EN,LC,LC,VU,CR,EN (EN-CR),G1G2,70,96,32
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,CR,LC,LC,DD,CR,CR,G1,243,201,28
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,DD,LC,LC,NE,CR,CR,G1,245,252,179
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,DD,VU,LC,DD,LC,VU,G3,54,163,120
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,EN,LC,LC,DD,EN,EN,G2,131,173,223


In [5]:
# Add 2 columns to 'ecoinfo_df'

# 1st column - Add column 'Type' (needed for CRUZ addin input file 
#'targets.csv'; might not be needed for marxanconpy) Uses np.select to assign 
# a number (1 or 2), based upon the string seen in the 'RLE_FINAL' column
# (Type = 1 if 'CR', 'CR (CR-EN)', 'EN (CR-EN) or 'EN'; Type = 2 if 'VU')

# create a list of conditions
type_conditions = [(ecoinfo_df['RLE_FINAL'] == 'CR'), 
                  (ecoinfo_df['RLE_FINAL'] == 'CR (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'VU')]

# create a list of the values to assign for each condition
type_values = [1, 1, 1, 1, 2]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Type'] = np.select(type_conditions, type_values)

# 2nd column - Add column 'Current_IUCN_TH'. Uses np.select to assign a 
# threshold percentage, based upon the column 'Type' (5% if 1, 10% if 2)

# create a list of conditions
current_threshold_conditions = [(ecoinfo_df['Type'] == 1), 
                               (ecoinfo_df['Type'] == 2)]

# create a list of the values to assign for each condition
current_threshold_values = [.05, .10]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Current_IUCN_TH'] = np.select(
    current_threshold_conditions, current_threshold_values)

ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE,Type,Current_IUCN_TH
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,LC,EN,VU,CR,G1,204,252,105,1,0.05
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,LC,NT,LC,VU,G3,218,238,243,2,0.1
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,EN,VU,CR,CR (EN-CR),G1G2,144,201,143,1,0.05
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,LC,VU-EN,EN,EN,G2,149,143,26,1,0.05
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,LC,VU,CR,EN (EN-CR),G1G2,70,96,32,1,0.05
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,LC,DD,CR,CR,G1,243,201,28,1,0.05
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,LC,NE,CR,CR,G1,245,252,179,1,0.05
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,LC,DD,LC,VU,G3,54,163,120,2,0.1
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,LC,DD,EN,EN,G2,131,173,223,1,0.05


In [6]:
# Create lists that will be used later in iteration loops

# Create list of threshold values to test
test_threshold = [1.0, 0.75, 0.50, 0.25]

# FUTURE UPDATE (CREATE ISSUE?) This can be accomodated once we can create
# hex and shp files for Landfire defined ecosystems on the fly, rather than 
# using the files # that Lana created in ArcGIS)
# Ask for user input for matching 'Values' column, as a way of selecting 
# ecosystems from the full Landfire 2020 EVT data, and assign that input to 
# list variable 'value_filter'. 
# Also ask for one-word 'Short_Name' value for each ecosystem being analyzed, 
# which will be used for file naming

# IN THE MEANTIME, 'Short_Name' values have been hardcoded, using the values 
# Lana chose when creating her ArcGis files.  These were added manually to the 
# csv file uploaded to GitHub. When/If the 'Short_Name' comes from user input, 
# the source data in repo assets (and links to source files in this code) will 
# need to be updated to the full version of the LF_2020_EVT_README file.

# The list variable 'value_filter' is hard coded to show the values matching 
# the three ecosystems with Lana's shp and hex files nitially uploaded to the 
# GitHub repository - 'dome', 'dune', and 'mesic', 
value_filter = [7431, 7322, 7447]

# use value_filter to create a new df with matching records only
eco_subset_df = ecoinfo_df[ecoinfo_df['Value'].isin(value_filter)]

# Create alphabetical list of ecosystems to be analyzed, taken from the 
# 'Short_Name' column of eco_subset_df
eco_list = eco_subset_df.index.values.tolist()
eco_list.sort()
# print(eco_list)
eco_list

# print(eco_subset)

['dome', 'dune', 'mesic']

In [7]:
# create function to write targets.csv files, for each threshold test value

# NOT SURE IF THIS WILL ULTIMATELY BE NEEDED IN ITS CURRENT FORM, AS THE 
#TARGETS INPUT FILE MIGHT ONLY BE USED BY THE QGIS ADDIN CLUZ, RATHER THAN 
# MARXAN/MARXANCONPY ITSELF

def create_targets_files(eco, thresholds_test, eco_info, path):
 # YOUR CODE HERE
     """creates the targets.csv files needed for Marxan analysis 
     (?? only when using CRUZ add-in in QGIS ??).

     Parameters
     ----------
     eco : str
     name of ecosystem that will be analyzed by Marxan

     thresholds_test : list
     list of threshold values to be tested for each ecosystem

     eco_info : dataframe
     source of info for each ecosystem, with columns 'OID' (Unique ID number),
     'Name' (ecosystem name), Type (number representing RLE Status), Size of 
     Ecosystem (units of area measurement) and the Current IUCN Threshold 
     value, based upon ecosystem's RLE status

     path : filepath
     filepath to ecosystem subdirectory where targets files will be saved

     Returns
     -------
     returned_data : csv
     csv files are saved to ecosystem directories, one file for each threshold
     value to be tested
     """
     for val in thresholds_test:
            target_info = {'Id': [eco_info.loc[eco]['OID']], 
                           'Name': [eco], 
                           'Type': [eco_info.loc[eco]['Type']], 
                           'sq_km': [eco_info.loc[eco]['US_km2']],
                           'iucn_th': [eco_info.loc[eco]['Current_IUCN_TH']]}
            target_df = pd.DataFrame(data=target_info).set_index('Id')
            target_df['Target'] = (target_df['sq_km'] * target_df['iucn_th'])
            target_df['Target'] = (val * target_df['Target'])
            target_df.drop(["sq_km", "iucn_th"], axis = 1, inplace = True)
            outpath = os.path.join(path, 'targets_' +str(val) + '.csv')
            target_df.to_csv(outpath)
     return 
   

In [9]:
# MAIN WORFLOW BEGINS HERE, WITH CREATING THE FILE DIRECTORIES AND INPUT FILES 
# NEEDED FOR MARXAN ANALYSIS

# Each time the code runs, a new timestamped diretory is created, that will
# contain subdirectories created from the 'Short_Name' value of selected 
# ecosystems. Each of these ecosystem subdirectories will have a named input 
# and output folder. 

# Functions will be written to create the specific input files needed for 
# Marxan analysis. Initially the formulas will use Lana's sample shp and hex 
# files that have been uploaded to the GitHub repository.  Eventually they 
# might use files that are obtained programatically using the Landfire API (or
# a future user's own source files)

# STEP 1:
# Create a timestamped directory, with a subdirectory for each ecosystem 
# selected in 'value_filter'.  Each ecosystem directory includes the input and 
# output directories (currently also creates the 'targets.csv' files for each 
# value in 'test_threshold' list variable, which are used by CLUZ in QGIS. 
# These files are created using the function 'create_targets_files', but may 
# not ultimately be needed by Marxan/marxanconpy)

newdir = os.path.normpath(os.path.join(data_path, 
                                       datetime.datetime.now().strftime(
                                           '%Y%m%d_%H%M%S')))
os.makedirs(newdir)
os.chdir(newdir)
for eco in eco_list:
    os.makedirs(eco),
    eco_data_path = os.path.normpath(os.path.join(data_path, newdir, eco))
    os.chdir(eco_data_path)
    os.makedirs('input')
    # IN THE FUTURE - os.chdir to 'input', and write add'l functions to create 
    # the input files needed for later analysis using marxanconpy.  Lana has
    # generated these input files with ArcGIS using the add-in ArcMarxan.  
    # we may be able to get the python code for creating these input files 
    # from QGIS using the add-in QMarxan - which is the open source version of 
    # the ArcMarxan tool Lana used.
    os.makedirs('output')
    create_targets_files(eco, test_threshold, ecoinfo_df, eco_data_path)
    os.chdir(newdir)
    
os.getcwd()

'C:\\Users\\cwsnd\\earth-analytics\\data\\kba_th_sa\\20220517_110602'

In [10]:
# FURTHER STEPS:
# Once directories & input files are set-up as needed for Marxan analysis -
# the next section of needed code are functions using actual marxanconpy, and
# the handling of the results 
# (store files, add collected info (and summary info?) to dataframe, which 
# will ultimately be saved as .csv output file)

In [11]:
# THIS CELL SHOWS ORIGINAL WORKFLOW IDEA FOR REFERENCE - CURRENT CODE IS ABOVE

# GET LIST OF ECOSYSTEMS TO ANALYZE 
#     (FROM PROVIDED 'LF_EVT_2020' FILE)
    
# SET UP FOLDER STRUCTURE FOR EACH ECOSYSTEM (CREATE INPUT/OUTPUT FOLDERS ETC)
#     (I'VE GOT SOME INITIAL CODE FOR THIS IN A JUPYTER NOTEBOOK ON OUR 
#     NATURESERVE REPOSITORY)
     
# GET TWO INITIAL SHAPEFILES FOR EACH ECOSYSTEM THAT WILL BE NEEDED FOR MARXAN 
# ANALYSIS, SAVE TO APPROPRIATE FOLDER 
#     (USE LANA'S EXISTING FILES FROM ARCGIS WITH PAT'S 2020 DATA, OR 
#     INVESTIGATE LANDFIRE API FOR 2016 DATA)
     
# SET UP LOGIC OF CURRENT KBA THRESHOLDS
#     (FOR IUCN A2:THREATENED ECOSYSTEM TYPE
#          IF ECOSYSTEM'S 'LF_EVT_2020[RLE_FINAL]' VALUE = CR, CR (EN-CR), 
#                  EN or EN (EN-CR)
#              THRESHOLD = >= 5% OF ECOSYSTEM TOTAL EXTENT 
#                 (USE 30M, US-hectare, or US_km2 COLUMN OF 'LF_EVT_2020')
#          IF ECOSYSTEM'S 'LF_EVT_2020[RLE_FINAL]' VALUE = VU
#              THRESHOLD = >= 10% OF ECOSYSTEM TOTAL EXTENT

# CREATE LIST FOR THRESHOLD TEST (1.0, 0.75, 0.50, 0.25)
     
# *INITIAL OUTPUT* - create table of 4 threshold values for each ecosystem    
#      (from Pat's Study Plan - "This will result in a tabular summary of 4 
#          threshold values per ecosystem type, depending on its RLE status")
     
# ASSIGN EACH ECOSYSTEM TO A SPATIAL CHARACTERISTIC TYPE (PATCHY, LINEAR or 
#     MATRIX) - THIS IS BASED UPON OUR VISUAL ASSESSMENT, WHICH WILL BE 
#     APPROVED BY PAT)
     
# ITERATION LOOP:
#     FOR EACH ECOSYSTEM IN LIST
#          RUN MARXANCONPY ANALYSIS (***see note below) FOR THE APPROPRIATE 
#             KBA THRESHOLD, BASED ON 'RLE_FINAL'
#          COMPLETE SEPARATE RUNS FOR EACH TEST LEVEL (1.0, 0.75, 0.50, 0.25)
#          SAVE RESULTS FROM EACH RUN TO DATAFRAME
#     SAVE DATAFRAME TO TABLE (INCL. THE ASSIGNED SPATIAL CHARACTERISTIC INFO)

# SAVE FINAL DATAFRAME AS CSV
     
     
# *** USING MARXANCONPY FOR FURTHER ANALYSIS
     
#      INFO FROM MARXANCONPY DOCUMENTATION - 
#         https://remi-daigle.github.io/marxanconpy/
#         The first step is to import the package and create a MarCon dictionary.
#              import marxanconpy
#              import os
#              project = marxanconpy.marcon.new_project()
     
#      INFO FROM https://marxanconnect.ca/tutorial.html#4_marxanconpy_Python_Module
#          Using the spatial dependencies approach using landscape-based data
#          https://marxanconnect.ca/targets.html
     


     
