In [1]:
# Import libraries
import os
import csv
import datetime
import io
import pathlib
from pathlib import Path
import requests
import shutil
import time
from glob import glob

# from qgis.core import *
# import qmarxan_utils as qmu # import runMarxanOnce
# import marxanconpy as mx

import contextily as cx
import earthpy as et
import earthpy.plot as ep
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from rasterio.crs import CRS
from rasterio.plot import plotting_extent
import rioxarray as rxr
import seaborn as sns
import subprocess

import kba_thresh_sa_scripts as ks

# set global cache override variable
CACHE_OVERRIDE = False

In [2]:
# # TEST CELL
# # bigrunpath = os.path.normpath(os.path.join(G:\CHRISTY\fromHarddrive\kba_thresh_sa\20220619_144619rm_3r_1000000i_10000))
# bigfile = open(r"G:/CHRISTY/fromHarddrive/kba_thresh_sa/20220619_144619rm_3r_1000000i_10000/final_summary.csv", "r")
# bigfile = pd.read_csv(bigfile)
# file_mv = bigfile.loc[bigfile['Missing_Values'] == 0]
# file_mv = file_mv.sort_values("Score")
# file_mv


#### Check for 'earth-analytics/data/kba_thresh_sa' directory
* If it exists, it will be set as the working directory.
* If it doesn't exist, user is prompted to return to first notebook in workflow.

#### *IN THE FUTURE -* 
* Should there also be a check to verify that the 'hex_shp' dir exists, and/or 
that the 'hex_shp' directory actually contains shapefiles? 

In [3]:
# Define a filepath to 'earth-analytics/data/kba_thresh_sa' directory
data_path = os.path.normpath(os.path.join(et.io.HOME, 
                                          'earth-analytics', 
                                          'data', 
                                          'kba_thresh_sa'))

# Check if 'kba_thresh_sa' directory exists.  If it doesn't, prompt user to 
# return to the first notebook to begin workflow.  If it does, change working
# directory to 'earth-analytics/data/kba_thresh_sa', and define the path to 
# hex files directory that was created in the first notebook.
if os.path.exists(data_path):
 print('Working directory is set to earth-analytics/data/kba_thresh_sa.')
 os.chdir(data_path)
 # define the path to the hexfiles that was created in the 1st notebook
 shp_data_path = os.path.normpath(os.path.join(data_path, 'hex_shp'))
else:
 print("Please go to first notebook in workflow to set up initial'\
      'directories")
 

Working directory is set to earth-analytics/data/kba_thresh_sa.


In [4]:
# Define path to Marxan.exe executable file has been manually copied over to 
# 'kba_thresh_sa' directory (maybe it can be copied to there from repo?

# v4.0.6 (might be causing 'target2' crash? use 2.43 instead)
marxan_path = os.path.join(data_path, "Marxan_x64.exe")

# v2.43
marxan_243_path = os.path.join(data_path, 'Marxan_x64_243.exe')

# v1.8.10
marxan_1810_path = os.path.join(data_path, 'Marxan_1_8_10.exe')

#### Save table of information to 'earth-analytics/data/kba_thresh_sa'  

* The workflow requires an associated table, with information about the 
ecosystems to be analyzed *(Need to provide more detail about what specific 
information this table requires... Or will we simply work with the full Landfire
readme info, which is saved to the repo?  If so, do we need to add a unique 
one-word 'Short_Name' to each ecosystem listed, or change the file-naming system 
to use one of the existing numerical unique identifiers - like 'OID' or 
'Value'?)*.  

* This file will be saved locally to the 'earth-analytics/data/kba_thresh_sa' 
directory.  

* In our inital workflow, we are using the 'LF_EVT_2020_README' file that was
provided along with the Landfire raster.  
&nbsp; I've manually edited this file to  
&emsp; 1. show only the rows for the nine ecosystems selected for initial 
analysis.  
&emsp; 2. add a new column to show the one word short name Lana used when creating 
her initial files in ArcGIS.  

* This file has been manually uploaded to our GitHub repo as 'Assets/Data/
from_LF_EVT_2020_README.csv'. 

* The code below will download that file from URL to a pandas dataframe, then
save that dataframe locally as a csv.  

#### *IN THE FUTURE -* 
* This existing code could be reused if the user were prompted for a url where 
they have their table stored?
* Or,  
&emsp; 1. prompt user to save their table to 'earth-analytics/data/kba_thresh_sa' 
as specifically named 'ecosystem_info.csv'  
&emsp; 2. Then check for 'ecosystem_info.csv' in 
'earth-analytics/data/kba_thresh_sa'  
&emsp; 3. If found, load to dataframe  
&emsp; &emsp; If not found, prompt user to "Save ecosystm_info.csv' to 'earth-analytics
/data/kba_thresh_sa' directory, then rerun notebook" 
        
           

In [5]:
# Download the csv file stored on GitHub repository 
# (contains info on selected ecosystems taken from LF_EVT_2020_README 
# file, with an added 'Short_Name' field that is used as index)

# Provide the URL (using raw content at GitHub)
ecoinfo_url = ("https://raw.githubusercontent.com/csandberg303/"
               "kba-threshold-sensitivity-analysis/main/assets/data/"
               "from_LF_EVT_2020_README.csv")

# Create local cache overide variable
cache_override = True or CACHE_OVERRIDE

# Provide the path to local directory
ecoinfo_path = os.path.normpath(
    os.path.join(data_path, 'from_LF_EVT_2020_README.csv'))

# Create dataframe from information at provided URL
ecoinfo_df = pd.read_csv(ecoinfo_url).set_index('Short_Name')

# Check for csv in local directory and create from df if needed
if not os.path.exists(ecoinfo_path) or cache_override:
    # Read csv at URL into pandas dataframe, using 'Short_Name' col as index
    ecoinfo_df.to_csv(ecoinfo_path)
    
ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,A3_FINAL,B1_FINAL,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,CR,LC,LC,EN,VU,CR,G1,204,252,105
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,VU,LC,LC,NT,LC,VU,G3,218,238,243
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,LC,EN,EN,VU,CR,CR (EN-CR),G1G2,144,201,143
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,EN,LC,LC,VU-EN,EN,EN,G2,149,143,26
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,EN,LC,LC,VU,CR,EN (EN-CR),G1G2,70,96,32
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,CR,LC,LC,DD,CR,CR,G1,243,201,28
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,DD,LC,LC,NE,CR,CR,G1,245,252,179
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,DD,VU,LC,DD,LC,VU,G3,54,163,120
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,EN,LC,LC,DD,EN,EN,G2,131,173,223


In [6]:
# Add 2 columns to 'ecoinfo_df'

# 1st column - Add column 'Type' (needed for CLUZ addin input file 
#'targets.csv'; might not be needed for marxanconpy) Uses np.select to assign 
# a number (1 or 2), based upon the string seen in the 'RLE_FINAL' column
# (Type = 1 if 'CR', 'CR (CR-EN)', 'EN (CR-EN) or 'EN'; Type = 2 if 'VU')

# create a list of conditions
type_conditions = [(ecoinfo_df['RLE_FINAL'] == 'CR'), 
                  (ecoinfo_df['RLE_FINAL'] == 'CR (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'VU')]

# create a list of the values to assign for each condition
type_values = [1, 1, 1, 1, 2]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Type'] = np.select(type_conditions, type_values)

# 2nd column - Add column 'Current_IUCN_TH'. Uses np.select to assign a 
# threshold percentage, based upon the column 'Type' (5% if 1, 10% if 2)

# create a list of conditions
current_threshold_conditions = [(ecoinfo_df['Type'] == 1), 
                               (ecoinfo_df['Type'] == 2)]

# create a list of the values to assign for each condition
current_threshold_values = [.05, .10]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Current_IUCN_TH'] = np.select(
    current_threshold_conditions, current_threshold_values)

ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE,Type,Current_IUCN_TH
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,LC,EN,VU,CR,G1,204,252,105,1,0.05
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,LC,NT,LC,VU,G3,218,238,243,2,0.1
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,EN,VU,CR,CR (EN-CR),G1G2,144,201,143,1,0.05
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,LC,VU-EN,EN,EN,G2,149,143,26,1,0.05
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,LC,VU,CR,EN (EN-CR),G1G2,70,96,32,1,0.05
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,LC,DD,CR,CR,G1,243,201,28,1,0.05
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,LC,NE,CR,CR,G1,245,252,179,1,0.05
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,LC,DD,LC,VU,G3,54,163,120,2,0.1
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,LC,DD,EN,EN,G2,131,173,223,1,0.05


#### *IN THE FUTURE -* 

Currently our code will work with the ecosystem raster and hex files that Lana 
created in ArcGIS using the ArcMarxan plugin.  Ultimately we hope to work directly
with the full Landfire EVT 2020 raster, but the file is proving too large to 
effectively manage with our personal laptops. A solution may be found using the 
2016 Landfire data which has an available API (the 2020 data is scheduled to be 
published to the API later this year). An alternitive solution may be found using 
Dask, or possibly Amazon Web Services.

If/When our code can access the full CONUS raster, the source data in repo assets 
(and links that file in this code) will need to be updated to the full version of 
the raster's LF_2020_EVT_README file. Once that occurs, we could ask for user 
input to get entries matching the 'Values' column in that file, as a way of 
selecting specific ecosystems from the full Landfire EVT 2020 data. That user 
input would be assigned to a list variable 'value_filter'. 

The user would then be prompted for a one-word 'Short_Name' value for each 
ecosystem being analyzed (ex. mesic, dune, dome), to be used in file naming. This 
abbreviated name would be added to the ecoinfo_df.  

Currently the 'Short_Name' values have been hardcoded, to match what Lana chose 
when creating her ArcGis files. The 'value_filter' variable will also be 
hardcoded, to match the values seen in the LF_2020_EVT_README file for the three 
ecosystems we are using as test data (Crowley's Ridge Mesic Loess Slope Forest, 
Southwest Florida Dune and Coastal Grassland, and South Florida Cypress Dome).


In [7]:
# CREATE LISTS THAT WILL BE USED LATER IN ITERATION LOOPS

# Create list of threshold values to test
test_threshold = [1.0, 0.25, 0.50, 0.75] 

# Define list variable 'value_filter' to show the values matching the 'Values' 
# column of 'ecoinfo_df' for the three ecosystems which have shp and hex files 
# uploaded to the GitHub repository - 'dome', 'dune', and 'mesic', 
value_filter = [
    7431, # dune
    7322, # mesic
    7447 # dome
    ]

# use value_filter to create a new df with only matching records
eco_subset_df = ecoinfo_df[ecoinfo_df['Value'].isin(value_filter)]

# Create alphabetical list of ecosystems to be analyzed, taken from the 
# 'Short_Name' column of eco_subset_df
eco_list = eco_subset_df.index.values.tolist()
eco_list.sort()

# print(eco_list)
eco_list

['dome', 'dune', 'mesic']

In [8]:
# DEFINE VARIABLES TO BE USED IN MARXAN RUN

# provide a testrun_basename (will appear in filename for final summary files)
testrun_basename = 'fullrun_2plots'

# ESPG value to set as CRS for raster and shapefile
espg = '5070'

# # Set prop for spec.dat (default value = 30% of total extent) 
# (must be between 0 and 1, Lana tutorial suggested 0.3)
prop = 0.3

# Species Penalty Factor - more detail needed... we're using default val of 1
spf = 10

# Number of repeat runs (or solutions) - orig value in qmarxan = 100
numreps = 100

# Number of iterations for annealing 
# orig value 1000000
# (RUNMODE 1 & 3 did not complete successfully with numitins=10 (or 1000?)
numitns = 10000

# Set blm (default value from qmarxan code = 1)
blm = 100
    
# test runmode
# runmode_ls = [1 , 3] # runmode_ls used for test loop only, no longer needed
runmode = 1

#### Loop through the eco_list, create directories and input files needed by Marxan.

Each time the code below runs, a new timestamped diretory is created. Inside will
be subdirectories created from the 'Short_Name' value of the selected ecosystems 
seen in the 'eco_subset' variable.

Each of these ecosystem subdirectories will have the following named 
subdirectories -
* input - where files needed by marxan analysis are stored (bound.dat, pu.dat, 
puvsp.dat, spec.dat)
* output - where files generated by marxan analysis are stored
* pu - pu and report seen in qmarxan setup (purpose tbd)
* report - pu and report seen in qmarxan setup (purpose tbd)
* source data - where the rasters and PU hex_shp files are moved to, after they
are copied from the 'r_tif' and 'hex_shp' folders

A fifth input file 'input.dat' is created and placed in the main ecosystem 
directory.

The code below will also create a 'targets.csv' file for each value in 
the 'test_threshold' list variable.  This is done using the function 
'create_targets_files'. These 'targets.csv' files are used by the CLUZ 
plugin in QGIS.  They are not used by the QMarxan QGIS plugin, and ultimately may
not be needed for our marxanconpy workflow.  *Perhaps the loop inside
the function that uses the 'test_threshold' variable could be reused for another 
purpose?*

Currently, we are using the input files that Lana created using ArcGIS, that have 
been saved to the repo.  The code below will simply copy those files from the repo 
and save them into each ecosystem's 'input' folder.

#### *IN THE FUTURE -* 
* Our project sponsor has said that the set of input files are commonly prepared 
using GIS tools.  If that is the practice we will continue, new files for 
additional ecosystmes will be generated using QGIS/QMarxan.
* Another option may be to create a new function in this workflow for each 
specific input file (input.dat, bound.dat, pu.dat, puvsp.dat, spec.dat.).  A 
function has been written to create the input.dat file, using code seen in the 
qmarxan repository from Apropos Information Systems (used under the GPL-2.0 
license). Creating the input files programatically rather than in GIS may allow 
for easier manipulation of the files within the workflow to perform the 
sensitivity analysis of the KBA threshold values.

 #### Conducting a Sensitivity Analysis of the IUCN TH - spec.dat 'target2'
 
 The KBA threshold can be tested using the 'target2' column in the spec.dat 
 input file.  
'target2' sets a minimum size for an identified area to count against 
the target value.  If a patch of selected hexcells does not meet that
minimum value, it won't appear in a final solution. 
This value is calculated for each ecosystem -
iterate by - test_threshold = [1.0, 0.75, 0.50, 0.25]
'target2' = (total area * 'Current_IUCN_TH') x test_threshold



In [9]:
# ********* NEW TEST FOR WHILE LOOP *********
# USE THIS CELL FOR MARXAN v4.06 AND MARXAN v2.43 (CURRENTLY USING 2.43)

# RUN THIS CELL TO BEGIN AUTOMATED WORKFLOW 
# (1ST CELL OF TWO - BEGIN MARXAN ANALYSIS)

# checks to see if a directory based upon provided 'testrun_basename' has 
# already been made. If so, a number will be added to the end 
# 'testrun_basename' before creating new directory (so that each named 
# directory will be identifiably unique).
testrun_basename_ck = glob(os.path.join(data_path, '*' + testrun_basename))
if testrun_basename_ck:
    expand = 1
    while True:
        expand +=1
        new_tr_bn = testrun_basename + str(expand)
        testrun_basename_ck = glob(os.path.join(data_path, '*' + new_tr_bn))
        if testrun_basename_ck:
            continue
        else:
            testrun_basename = new_tr_bn
            break
print('testrun_basename: ' + testrun_basename)

# set new directory name, based upon timestamp and provided 'testrun_basename'
new_dir = os.path.normpath(
    os.path.join(data_path, datetime.datetime.now().strftime('%Y%m%d_%H%M%S') 
                 + '_' + testrun_basename))
os.makedirs(new_dir)
print(new_dir + '\n')

# Set 'heurtype' - Determined by runmode entry in input.dat 
# if RUNMODE = 3 then use heurtype = 1 (greedy), else -1 (not used)
# (NOTE: this variable is used for RUNMODE 3 only, and currently this
# multiloop workflow is using RUNMODE 1. Keeping it in notebook in case that 
# may ever change)

if runmode == 3:
    heurtype = 1
else:
    heurtype = -1
print('runmode: ' + str(runmode) +'\nheurtype: ' + str(heurtype) +'\n')


### 1ST LOOP BEGINS HERE 

# LOOP THROUGH ECOSYSTEMS (in 'ecolist') 
for eco in eco_list:
    os.chdir(new_dir)
    # create directory for each ecosystem selected for analysis
    os.makedirs('eco_' + eco)
    os.chdir('eco_' + eco)
    eco_data_path = os.path.normpath(os.path.join(new_dir, 'eco_' + eco))
    # create 'source_data' directory to store ArcGIS shp and tif files
    os.makedirs('source_data')
    os.chdir('source_data')
    source_data_path = (new_dir, 'eco_' + eco, 'source_data')
    # copy source files that were stored locally to the 'hex_shp' and 'r_tif' 
    # directories after running 1st notebook. Our workflow is currently  
    # using the files Lana created manually using ArcGIS
    ks.get_source_files_targetloops(os.path.join(data_path, "hex_shp"), eco)
    ks.get_source_files_targetloops(os.path.join(data_path, "r_tif"), eco)
     
    # create 'orig_input_files' directory to store the 5 .dat files) for the 
    # ecosystem so that each Marxan analysis loop will pick them up from this 
    # location (since much info for the analysis runs will remain constant as 
    # the KBA threshold size is tested) 
    os.chdir(eco_data_path)
    os.makedirs('orig_input_files')
    os.chdir('orig_input_files')
    orig_input_data_path = os.path.normpath(os.path.join(data_path, 
                                                         new_dir, 
                                                         'eco_' + eco, 
                                                         'orig_input_files'))
    
    # CREATE INPUT FILES THAT WILL REMAIN CONSTANT DESPITE TEST LEVELS 
    # (pu.dat, puvsp.dat, bound.dat).  
    
    # CREATE INITIAL PU.DAT FROM ORIGINAL FORMULA 
    # Provides a record of each planning unit hex cell in the .shp file,  
    # using a default uniform cost of '1', and  a status of '0' which 
    # indicates that unit is avaialable to Marxan for selection. As the loops 
    # continue until set proportion target of 30% overall extent is reached, 
    # this pu.dat file will be updated so that selected cells will show a 
    # status value of '3' for unavailable/locked-out.
    ks.create_pu_dat_targetloops(eco, 
                                 eco_data_path)  
    orig_pu_dat_path = os.path.normpath(os.path.join(
        orig_input_data_path, 'pu.dat'))
    pu_dat = pd.read_csv(orig_pu_dat_path)

    # CREATE 'updated_pu_dat' DF; BASED UPON 'pu.dat'
    # This df will be used to track selected cells as loops progress, so that 
    # those cells will be locked out of selection in future loops.
    # (THIS BECOMES THE NEW 'pu.dat' INPUT FILE IN FUTURE LOOPS)
    updated_pu_dat = pu_dat.set_index('id')

    # CREATE 'pu_selected' DF BASED UPON 'pu.dat'. 
    # This df will be used to keep an overall record of which cell was 
    # selected in each loop, so that each loop's selection can be seen and 
    # measured independently.
    pu_selected = pu_dat.set_index('id')
    # Initial value of 'select' column is set to 'not selected'
    pu_selected['select'] = 'not selected'
     
    # USE 'get_marxan_input_files' FUNCTION TO COPY IN ANY REMAINING .DAT 
    # FILES NEEDED THAT ARE CREATED IN ArcGIS/QGIS RATHER THAN PYTHON.
    # This formula currently is used for 'bound.dat' and 'puvsp.dat'.
    # Formula will copy files that have been created using ArcMarxan tool 
    # in ArcGIS then saved to the repository.
    ks.get_marxan_input_files_targetloops(eco, 
                                          ['bound.dat', 
    #                                     "pu.dat", 
                                           'puvsp.dat', 
    #                                     "spec.dat"
                                          ])
    bound_dat_path = os.path.normpath(os.path.join(
        orig_input_data_path, 'bound.dat'))
    bound_dat = pd.read_csv(bound_dat_path)
    puvsp_dat_path = os.path.normpath(os.path.join(
        orig_input_data_path, 'puvsp.dat'))
    puvsp_dat = pd.read_csv(puvsp_dat_path)
    
    os.chdir(eco_data_path)
    
    # create empty list variable that will be used to collect summary info 
    # from loops
    select_summary_ls = []
        
    # LOOP THROUGH EACH KBA THRESHOLD SIZE TEST 
    # (values in 'test_threshold', defined in initial 'CREATE LISTS' cell)
    for test in test_threshold:
        # set 'target2' variable 
        # to equal KBA Threshold Value for Ecosystem at Test Level
        # target2 = US_m2 x Current_IUCN_TH x threshold test level
        current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
        us_km2 = eco_subset_df.at[eco,'US_km2']
        us_m2 = us_km2 * 1000000

        target2 = test * current_iucn_th * us_m2 
        target2 = round(target2)

        # set the 'target' used in spec.dat file to equal 'target2'
        target = target2
        
        # create Scenario ID from 'eco' & 'test' 
        # (used as prefix in filenames, so any '.' seen in 'test' are removed) 
        scen_id = (eco + str(test).translate(
            str.maketrans('', '', '.')) + '_run')
        
        # CREATE INPUT FILES IN THIS LOOP WHEN THEY REQUIRE TEST LEVEL INFO 
        # (input.dat, spec.dat)
             
        # CREATE 'input.dat' FILE USING FORMULA ADAPTED FROM 'qmarxan_toolbox' 
        # (including the 'formatAsME' format as Marxan Exponent function)
        # Some input parameters are provided to the formula, to replace the 
        # default values provided in the formula code. 
        ks.create_input_dat(orig_input_data_path, 
                            blm, numreps, 
                            numitns, 
                            runmode, 
                            heurtype, 
                            scen_id)
        input_dat_path = os.path.normpath(os.path.join(
            orig_input_data_path, "input.dat"))
        input_dat = pd.read_csv(input_dat_path)
        
        # CREATE THE 'spec.dat' FILE FROM v4 FORMULA (includes 'target' only)
        os.chdir(orig_input_data_path)
        ks.create_spec_dat_v4_targetloops(eco_subset_df, eco, target, spf)
        spec_dat_path = os.path.normpath(os.path.join(
            orig_input_data_path, 'spec.dat'))

        # Print initial info statement for test loop and begin creating the 
        # needed directories
        print("Begin: " + scen_id)
        os.chdir(eco_data_path)
        os.makedirs(scen_id) 
        ecotest_data_path = os.path.normpath(os.path.join(data_path, new_dir, 
                                                          'eco_' + eco, 
                                                          scen_id))

        # SET 'end_count' VALUE TO END MULTILOOP
        # Based upon given 'prop' value of 30% and 'Current_IUCN_TH' 
        # 1st ex: if prop = 30% & eco is VU(KBA 10%); then 3 x 10% KBA = 30% 
        # end_count = 3 (@ 1.0 test), 6 @ 0.50 test and 12 @ 0.25 test
        # 2nd ex: if prop = 30% & eco is CR/EN(KBA 5%); then 6 x 5% KBA = 30% 
        # end_count = 6 (@ 1.0 test), 12 @ 0.50 test and 24 @ 0.25 test
        end_count = round(prop/(ecoinfo_df['Current_IUCN_TH'] * test))
        count = 1
        
        # create path for 'pu_selected' file in 'scen_id' directory
        pu_selected_path = os.path.normpath(
            os.path.join(new_dir, 'eco_' + eco, scen_id, 
                         scen_id + '_pu_selected.csv'))
        pu_selected.to_csv(pu_selected_path)
        
        # create path for 'updated_pu_dat' file in 'scen_id' directory
        updated_pu_dat_path = os.path.normpath(
            os.path.join(new_dir, 'eco_' + eco, scen_id, 
                         scen_id + '_updated_pu.dat'))
        updated_pu_dat.to_csv(updated_pu_dat_path)
        
        # BEGIN MULTILOOP FOR EACH TEST IN EACH ECOSYSTEM DIRECTORY        
        while count <= end_count[eco]:
            # create directory for loop, to store Marxan input/output files
            os.chdir(ecotest_data_path)
            loop_count = 'loop_' + str(f"{count:02d}")
            os.makedirs(loop_count)
            loop_count_path = os.path.normcase(os.path.join(
                ecotest_data_path, loop_count))
            os.chdir(loop_count_path)
            
            # COPY IN INPUT FILE FROM 'orig_input_files' DIRECTORY
            shutil.copy(input_dat_path, os.getcwd())

            # CREATE INPUT DIRECTORY
            # which is where the four remaining .dat files will be stored
            os.makedirs('input')
            eco_input_data_path = os.path.normpath(os.path.join(
                ecotest_data_path, loop_count, 'input'))
            os.chdir(eco_input_data_path)
            
            # COPY IN THE 3 DAT FILES THAT WILL REMAIN UNCHANGED AS LOOPCOUNT
            # PROGRESSES (bound.dat, puvsp.dat and spec.dat) 
            unchanged_dat_files = (bound_dat_path, 
                                   puvsp_dat_path, 
                                   spec_dat_path)
            for file in unchanged_dat_files:
                shutil.copy(file, os.getcwd())
        
            # GET APPROPRIATE 'pu.dat' FILE FOR LOOPCOUNT
            # This is where the loopcount determines if the original 'pu.dat' 
            # file should be used (if Loop 1), or if the 'updated pu.dat' file
            # generated from the previous loops should be used (Loops 2-End) 
            if count == 1:
                shutil.copy(orig_pu_dat_path, os.getcwd())
                pu_dat = pd.read_csv(orig_pu_dat_path)
            else:
                shutil.copy(updated_pu_dat_path, os.getcwd())
                os.rename(scen_id + '_updated_pu.dat','pu.dat')
            
            # create remaining directories
            os.chdir(loop_count_path)
            os.makedirs('output')
            os.makedirs('report')
            os.makedirs('pu')      

            # BEGIN MARXAN ANALYSIS RUN
            print('\n\n' + scen_id + " " + loop_count + 
                  ': MARXAN ANALYSIS INITIATED')   
            # call on marxan executable (currently using v2.43)
            os.startfile(marxan_243_path)
            
            # DEFINE A PAUSE FOR MARXAN EXECUTION, USING ONE OF TWO METHODS
            # This is needed to allow Marxan time to finish writing 
            # output files before the workflow tries to locate them            
            # NOTE: ONE OPTION MUST BE COMMENTED OUT BEFORE RUNNING THE CELL 
            
            # OPTION 1: Hit Enter to Continue 
            # Wait for Marxan pop-up execution to complete, then press 'Enter'
            # at prompt in screen output window after 'The End' is seen
            # (overall quickest, but requires attention)
#             def pause():
#                 programPause = input("Press the <ENTER> key to continue...")
#             pause()
#             print('Wait to see 'The End' at bottom of Marxan execution '
#                  'pop-up before pressing Enter')

            # OPTION 2: Set sleep timer length
            # Define a sleep timer so that Python will simply count down that 
            # number of seconds before moving on. Need to ensure that the 
            # sleep time set > filewriting/execution, or errors in reading 
            # output files will occur
            # (automates the workflow, but takes longer time overall)
            print('time.sleep(15) applied to pause workflow execution while '
                  'Marxan output files are written')
            time.sleep(2) # Sleep for 10 seconds
          
            # WHEN MARXAN COMPLETES, GET BEST RUN SOLUTION AND UPDATE PU.DAT

            # 1- test for output files, to see if run completed successfully
            # open '_best' file created by Marxan and saved to 'output' dir
            globfile_best = glob(os.path.normpath(os.path.join(
                ecotest_data_path, loop_count, 'output', '*_best.csv')))            
            # if no file is found, print error message to screen
            if globfile_best == []:
                output = print (scen_id + ": ERROR: 'pu_selected' file not "
                                "found - check output/log. \nWill need to "
                                "resolve error and rerun Marxan if final "
                                "output files haven't completed successfully")  
            else:
                #Create list of the selected cells from 'best_run' output file
                best_run_file = pd.read_csv(globfile_best[0])
                selected_df = best_run_file[best_run_file['SOLUTION'] == 1]
                selected_cells = selected_df['PUID'].tolist()
                print(selected_cells)
                for puid in selected_cells:
                    # Update the status of those cells in pu.dat from 
                    # '0'-available to '3'-unavailable/locked-out
                    updated_pu_dat.at[puid,'status']=3
                    # Update the status of those cells in pu_select's select 
                    # column to show in which run they were selected
                    pu_selected.at[puid, 'select'] = (
                        'Select_' + str(f"{count:02d}"))
                # add additional summary info to 'pu_selected'
                pu_selected['dir_path'] = new_dir
                pu_selected['Short_Name'] = eco               
                pu_selected['current_test_level'] = test
                pu_selected['Current_IUCN_TH'] = current_iucn_th
                pu_selected['US_km2'] = us_km2
                pu_selected['US_m2'] = us_m2
                pu_selected['30% of US_m2'] = us_m2*prop
                pu_selected['KBA @ current test (m_2)'] = target2
                pu_selected['BLM'] = blm
                pu_selected['SPF'] = spf
                # save updated 'updated_pu_dat' file for next loop
                updated_pu_dat.to_csv(updated_pu_dat_path)

            # add +1 to count, and continue 'while' loop
            count = count+1
            os.chdir(eco_data_path)
            
            # save 'pu_selected' file at ecotest_path level (ex mesic025)
            pu_selected.to_csv(pu_selected_path) 

            print(scen_id + ('info from ' + scen_id + 
                             ' will be added to final summary\n'))
            # Append 'pu_selected' to 'select_summary_df' for final summary df
            select_summary_ls.append(pu_selected)
        
# save info from loop stored in 'select_summary_ls' to '_final_summary.csv'
final_summary_df = pd.concat(select_summary_ls)
final_summary_df.to_csv(os.path.normpath(
os.path.join(new_dir, 'final_summary.csv')), index=False)
print("\n'final_summary.csv' saved to " + new_dir)
        
os.getcwd()
print('\nloop completed successfully')


testrun_basename: fullrun_2plots
C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots

runmode: 1
heurtype: -1

finished copying source files from C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\hex_shp
finished copying source files from C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\r_tif
pu.dat file created successfully
bound.dat successfully copied from url
puvsp.dat successfully copied from url
orig_input_files: input.dat created successfully
spec.dat file created successfully (v4)
Begin: dome10_run


dome10_run loop_01: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[387, 388, 389, 438, 439, 440, 441, 445, 448, 449, 450, 451, 484, 489]
dome10_runinfo from dome10_run will be added to final summary



dome10_run loop_02: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[181, 182, 190, 249, 250, 251, 255, 256, 257, 258,

[1, 4, 13, 14, 18, 19, 20, 21, 22, 63, 83, 105, 144, 205, 207, 210, 211, 275, 276, 278, 279, 284, 342, 343, 405, 406, 461, 462, 466, 467, 468, 469, 474, 475, 504, 505, 510, 526, 529, 551, 554, 557, 558, 559, 560, 561, 562, 563]
dome075_runinfo from dome075_run will be added to final summary

finished copying source files from C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\hex_shp
finished copying source files from C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\r_tif
pu.dat file created successfully
bound.dat successfully copied from url
puvsp.dat successfully copied from url
orig_input_files: input.dat created successfully
spec.dat file created successfully (v4)
Begin: dune10_run


dune10_run loop_01: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[225, 236]
dune10_runinfo from dune10_run will be added to final summary



dune10_run loop_02: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execu

[170, 204, 228, 237, 239, 240, 241, 249, 310]
dune05_runinfo from dune05_run will be added to final summary



dune05_run loop_06: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[14, 23, 157, 159, 163, 276, 301, 318, 333]
dune05_runinfo from dune05_run will be added to final summary



dune05_run loop_07: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[55, 60, 62, 64, 65, 66, 165, 251, 252, 254, 255, 256, 257, 261]
dune05_runinfo from dune05_run will be added to final summary



dune05_run loop_08: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[7, 11, 12, 52, 153, 208, 258, 263, 281, 284, 286, 287, 288]
dune05_runinfo from dune05_run will be added to final summary



dune05_run loop_09: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output fi

[44, 46]
mesic025_runinfo from mesic025_run will be added to final summary



mesic025_run loop_08: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[18, 21]
mesic025_runinfo from mesic025_run will be added to final summary



mesic025_run loop_09: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[152, 160]
mesic025_runinfo from mesic025_run will be added to final summary



mesic025_run loop_10: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[54, 55]
mesic025_runinfo from mesic025_run will be added to final summary



mesic025_run loop_11: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
[166, 177]
mesic025_runinfo from mesic025_run will be added to final summary



mesic025_run loop_12: MARXAN ANALYSIS INITIATED
time.sleep

[125, 249]
mesic075_runinfo from mesic075_run will be added to final summary



mesic075_run loop_07: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
mesic075_run: ERROR: 'pu_selected' file not found - check output/log. 
Will need to resolve error and rerun Marxan if final output files haven't completed successfully
mesic075_runinfo from mesic075_run will be added to final summary



mesic075_run loop_08: MARXAN ANALYSIS INITIATED
time.sleep(15) applied to pause workflow execution while Marxan output files are written
mesic075_run: ERROR: 'pu_selected' file not found - check output/log. 
Will need to resolve error and rerun Marxan if final output files haven't completed successfully
mesic075_runinfo from mesic075_run will be added to final summary


'final_summary.csv' saved to C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots

loop completed successfully


In [10]:
# inserting line to break execution betweeen workflow loops
# (so that file writing can complete before final summary process begins)
break_here

NameError: name 'break_here' is not defined

In [23]:
# 2ND PART OF CURRENT WORKFLOW - MARXAN 2.43 or 4.06 ONLY

# THIS WILL MERGE INFORMATION FROM THE 'pu_selected' FILE CREATED IN 1ST 
# WORKFLOW TO THE HEX SHAPEFILE, IN ORDER TO BE ABLE TO SHOW WHICH HEXES WERE 
# SELECTED.  THEN THE 'puvsp.dat' INPUT FILE WILL BE MERGED TO THE SHAPEFILE, 
# TO PROVIDE THE AMOUNT OF ECOSYSTEM (in m2) CONTAINED IN EACH HEX.  THIS 
# ALLOWS THE AREA OF THE SELECTION TO BE MEASURED AND PLOTTED, TO DETERMINE 
# IF THE SOLUTION MEETS REQUIREMENTS.  SUMMARY INFORMATION FROM THE RUNS WILL 
# ALSO BE COLLECTED AND SAVED TO A .CSV FILE FOR FURTHER ANALYSIS.

# Create empty lists outside the loop to store information:
# for plot images 
plot_im_list = []

# for info from shapefile, after its merged with 'puvsp.dat' and 'pu_selected'
shp_summary_ls = []

os.chdir(new_dir)
print('\nnew_dir ' + new_dir)

# Define 'ecotestdirs' glob list, to find all directories ending in '*_run' 
# (ex. 'dome025_run')
ecotestdirs = sorted(glob(os.path.join(data_path, '*' + testrun_basename, '*', 
                                       '*_run')))
print('ecotestdirs include ' + str(len(ecotestdirs)) + ' directories')
print('ecotest_data_path ' + ecotest_data_path)

count = 0

for ecotestdir in ecotestdirs:

    scen_id = os.path.split(ecotestdirs[count])[1]
    print('\nBegin loop for scen_id: ' + scen_id)
    os.chdir(ecotestdir)
    print(ecotestdir)
    get_eco = (''.join([i for i in scen_id if not i.isdigit()]))
    eco = get_eco.replace(get_eco[(len(get_eco)-4):], '')
    eco_data_path = os.path.normpath(os.path.join(new_dir, 'eco_' + eco))
    ecotest_data_path = os.path.normpath(os.path.join(new_dir, 'eco_' + eco, 
                                                      scen_id))
    
    # create 'selected_plot' dir if it doesn't already exist
    # (this is where the plots showing selected hexes will be stored)
    selected_plot_dir_path = os.path.normpath(os.path.join(ecotestdir, 
                                                          'selected_plot'))
    if os.path.isdir(selected_plot_dir_path):
        os.chdir(selected_plot_dir_path)
    else:
        os.makedirs('selected_plot')
        os.chdir('selected_plot')
    print('cwd ' + os.getcwd())
   
    # try to open 'pu_selected' file created in first workflow loop cell 
    globfile_selected = glob(os.path.normpath(
        os.path.join(ecotestdir, '*pu_selected*')))
    print('globfile_selected contains: ' + globfile_selected[0])
    if globfile_selected == []:
        output = print (scen_id + ": ERROR: 'pu_selected' file not found")
    else:
        # If found, merge 'pu_selected' with reprojected copy of the shp file
        # check if reprojected shp in 'source_data' already exists; create if
        # not found
        shp_layer_crs_path = os.path.normpath(os.path.join(
            new_dir, 
            'eco_' + eco, 
            'source_data', 
            eco + "_espg_" + espg +'.shp'))
        print('shp_layer_crs_path: ' + shp_layer_crs_path)
        print('eco: ' + eco)
        if glob(os.path.normpath(os.path.join(
            new_dir, '*', 'source_data', eco + "_espg_" + espg +'.shp'))):
            print('reprojected shp file check = PASS')
        else:
            # open the original shp file from 'eco_data_path/source_data' 
            orig_shp_data_path = glob(os.path.join(new_dir, '*', 
                                                   "source_data", 
                                                   eco + '.shp'))[0]
            print('\n reprojecting source shapefile;\norig_shp_data_path ' 
                  + orig_shp_data_path)
            orig_shp_layer = gpd.read_file(orig_shp_data_path)
            # reproject CRS of shp
            shp_layer_crs = orig_shp_layer.to_crs(epsg=espg)
            # create new .shp file
            shp_layer_crs.to_file(shp_layer_crs_path, index=False)

        # open reprojected shp layer and prepare to merge with other files
        merged_shp = gpd.read_file(shp_layer_crs_path)
        # merge reprojected shp file with 'pu_selected' & 'puvsp_dat' dfs                
        # add 'id' index to enable merge with other files
        merged_shp.insert(0, 'id', range(1, 1 + len(merged_shp)))
        merged_shp.set_index('id')
        # get 'pu_selected' file from 'globfile_selected' list
        pu_selected_path = globfile_selected[0]
        pu_selected = pd.read_csv(pu_selected_path).set_index('id')
        # merge 'pu_selected' to shp layer (adds 'select' column, & more *)
        merged_shp = merged_shp.merge(pu_selected, on='id')
        # open 'puvsp.dat' from input directory
        puvsp_path = glob(os.path.normpath(os.path.join(ecotestdir, '*', 
                                                        'input', 
                                                        'puvsp.dat')))[0]
        # merge with shp layer to get 'amount' from puvsp
        puvsp_dat = pd.read_csv(puvsp_path)
        puvsp_dat = puvsp_dat.rename(columns={'pu': 'id'}).set_index('id')
        merged_shp = merged_shp.merge(puvsp_dat, on='id')
        
        # use 'amount' value to calculate 'percent_of_total' 
        # (the proportion of total ecosystem extent found in each hexcell)
        merged_shp['percent_of_total'] = (
            merged_shp['amount']/merged_shp['US_m2'])
#         print('percent of total: ' + merged_shp[percent_of_total])
#         print(merged_shp)

        # save merged shapefile as new file
        # check if file already exists, if not create it
        merged_shp_layer_path = os.path.normpath(os.path.join(
            ecotest_data_path,
            'selected_plot', 
            scen_id + "_merged.shp"))
        if os.path.exists(merged_shp_layer_path):
            print(scen_id + "_merged.shp file check = PASS")
        else:
            # save merged shp with add'l 'selected' info as new shape file
            # THIS WOULD BE THE TIME TO CHECK FOR COLUMN NAMES >10 CHARS
            merged_shp.to_file(merged_shp_layer_path, index=False)
            print (scen_id + ': ' + eco + '.shp merged with ' + scen_id + 
                   "'pu_selected' and 'puvsp.dat', saved as " + scen_id +
                   "merged.shp") 
        # verify shp file exists, and print update to screen
        if os.path.exists(merged_shp_layer_path):
            print(scen_id + " : merged shapefile saved to 'selected_plot' "
                  "directory") 
        else:
            print(scen_id + (": Error: reprojected shapefile was not able"
               " to be saved"))

        # check if reprojected tif in 'source_data' exists, if not create it
        tif_layer_crs_path = os.path.normpath(os.path.join(
            new_dir, 'eco_' + eco, 'source_data', 
            eco + "_espg_" + espg +'.tif'))
        if os.path.exists(tif_layer_crs_path):
            print('reprojected tif file check = PASS')
        else:
            # open the tif file saved at 'eco_data_path/source_data' location
            tif_data_path = os.path.join(new_dir, 'eco_' + eco, 'source_data', 
                                         eco + '.tif')
            tif_layer = rxr.open_rasterio(tif_data_path, 
                                          masked=True).squeeze()
            # reproject CRS of tif - 
            # first create a rasterio crs object
            crs_espg = CRS.from_string('EPSG:' + espg)
            # then reproject tif using the crs object
            tif_layer_crs = tif_layer.rio.reproject(crs_espg)
            # create new .tif file
            tif_layer_crs.rio.to_raster(tif_layer_crs_path)
            # verify tif file exits, and print update to screen
            if os.path.exists(tif_layer_crs_path):
                print(scen_id + ': Raster reprojected to ESPG: ' + espg + 
                      " and saved to 'source_data' directory")
            else:
                print(scen_id + (": Error: reprojected raster was not "
                                 "able to be saved"))

        # get data from merged shp, to include in 'final_summary.csv'  
        merged_shp_df = merged_shp[['id', 
                                    'amount',
                                    'percent_of_total',
                                    'select',
                                    'Short_Name',
                                    'Current_IUCN_TH',
                                    'current_test_level',
                                    'KBA @ current test (m_2)',
                                    'US_km2',
                                    'US_m2',
                                    '30% of US_m2',
                                    'BLM',
                                    'SPF',
                                    'dir_path',]].copy()
                
        merged_shp_df.set_index('id')

        merged_shp_df.to_csv(scen_id + '_merged_results.csv')
        print (scen_id + ': ' + scen_id + ("'_merged_results.csv' saved "
                                           "to 'selected_plot' dir"))
        ###    

        # CREATE PLOT SHOWING MULITPLE LOOP'S SELECTIONS OVER THE RASTER
        # * VISUALIZATIONS SHOWING HEXCELL SELECTION FROM BEST RUN AND 
        # HEATMAP OF HEXCELL EXTENT AS A PROPORTION OF TOTAL EXTENT
        # solution, and save each as a .png image file
        print ('preparing plots...')

        # define raster extent for plotting
        raster_extent = plotting_extent(tif_layer_crs,
                                         tif_layer_crs.rio.transform())
        
        # get metrics to include in figtitle
        # total amount (m2) of ecosystem included in selection
        selected_m = merged_shp.query(
            "select!='not selected'")['amount'].sum()
        selected_km = selected_m/1000000
        selected_m_string = str("{:,.2f}".format(selected_m))
        selected_km_string = str("{:,.2f}".format(selected_km))

        # get total extent of ecosystem (from the amount column, in puvsp.dat)
        eco_extent_km = eco_subset_df.at[eco,'US_km2']
        eco_extent_m = eco_extent_km * 1000000
        eco_extent_km_string = str("{:,.2f}".format(eco_extent_km))
        eco_extent_m_string = str("{:,.2f}".format(eco_extent_m))
        
        # get Conservation Target value (currently 30% x total extent)
        conserv_tgt_km = prop * eco_extent_km
        conserv_tgt_km_string = str("{:.2f}".format(conserv_tgt_km))
        
        # get current test level
#         test_level = merged_shp_df['current_test_level']
#         test_level_string = str("{:.0%}".format(test_level.mean())) 
        
        # get selected proporion of total
        selected_prop = selected_km / eco_extent_km
        sel_prop_string = str("{:.2%}".format(selected_prop))
        
        # set target2 value as string, for inclusion in figure title
#         us_m2 = eco_subset_df.at[eco,'US_km2']*1000000
        test_level = merged_shp_df['current_test_level'].mean()
        test_level_string = str("{:.0%}".format(test_level))
        current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
        target2_m = (test_level * current_iucn_th * eco_extent_m).mean()
        target2_km = target2_m/1000000
        target2_m_string = str("{:,.2f}".format(target2_m))
        target2_km_string = str("{:,.2f}".format(target2_km))
        
        # print figure title info to screen for validation
        print('selected_km: ' + str(selected_km) + 
              '\neco_extent_km: ' + str(eco_extent_km) + 
              '\nconserv_tgt_km: ' + str(conserv_tgt_km) +
              '\nselected_prop: ' + str(selected_prop) +
              '\ntarget2_km: ' + str(target2_km) + 
              '\ntarget2_m: ' + str(target2_m) + 
              '\ntest_level: ' + str(test_level) +
              '\ncurrent_iucn_th: ' + str(current_iucn_th) +
              '\neco_extent_m: ' + str(eco_extent_m))
    
        # create strings for individual lines in figtitle
        ft1 = (scen_id.upper() + " - Searching for KBA @ " + test_level_string 
               + " Current IUCN Value\n")
        ft2 = ("Total Ecosytem Extent: " + eco_extent_km_string + " sq km\n")
        ft3 = ("Conservation Target: " + conserv_tgt_km_string + " sq km\n")
        ft3 = ("KBA target size: " + target2_km_string + " sq km\n")
        ft4 = ("Total Selected Ecosystem " + selected_km_string + "sq km\n(" +
               sel_prop_string + " of Total Extent)")
        
        selected_title_txt = ft1 + ft2 + ft3 + ft4
        
        # PLOT (3 LAYERS) - BEST SELECTION, RASTER, AND BASEMAP
        fig, ax = plt.subplots(figsize=(10, 10))
        merged_shp.plot(column='select',
                        cmap='nipy_spectral_r', # orig used viridis 
                        ax=ax, 
                        alpha=0.50, 
                        legend=True)
                          
        ax.set(title=selected_title_txt)
#         ax.set_axis_off()
        ax.axes.xaxis.set_visible(False)
        ax.axes.yaxis.set_visible(False)
        ax.patch.set_edgecolor('black')
        cx.add_basemap(ax=ax, crs=shp_layer_crs.crs)
        ax.imshow(tif_layer_crs, cmap='jet', extent=raster_extent, 
                  interpolation='nearest')
        plt.savefig((scen_id + '_pu_selections_over_raster.png'), 
                    facecolor='w', edgecolor='k', dpi=600)
        plt.close(fig)
        print(scen_id + ": _pu_selections_over_raster saved as .png\n")

        # convert 'selected' plot to image and add to 'plot_im_list', so 
        # that it'll be included in final pdf of plot images
        plot_im = glob(os.path.normpath(os.path.join(
            os.getcwd(), scen_id + "_pu_selections_over_raster.png")))
        plot_im = Image.open(plot_im[0])
        plot_im = plot_im.convert('RGB')
        plot_im_list.append(plot_im) 
        print("\nand will be included in 'final_plots.pdf'")
        print(scen_id + (
            'info from ' + scen_id + ' will be added to final summary\n'))
        
        count = count+1

for eco in eco_list:
    print('\nbegin loop for ' + eco)
    eco_data_path = os.path.normpath(os.path.join(new_dir, 'eco_' + eco))
#     ecotest_data_path = os.path.normpath(os.path.join(new_dir, 'eco_' + eco, 
#                                                       scen_id))
    os.chdir(eco_data_path)
    
    # define paths needed to get correct info for eco plot (no test level data needed)
    merged_shp_layer_path = glob(os.path.normpath(os.path.join(
        '*','selected_plot', '*_merged.shp')))[0]   
    merged_shp = gpd.read_file(merged_shp_layer_path)
    
    # define raster extent for plotting
    tif_layer_crs_path = os.path.normpath(os.path.join(
            os.getcwd(), 'source_data', eco + "_espg_" + espg +'.tif'))
#     tif_layer_crs_path = os.path.normpath(os.path.join(
#             new_dir, 'eco_' + eco, 'source_data', 
#             eco + "_espg_" + espg +'.tif'))
    tif_layer_crs = rxr.open_rasterio(tif_layer_crs_path, 
                                      masked=True).squeeze()
    raster_extent = plotting_extent(tif_layer_crs,
                                    tif_layer_crs.rio.transform())
    
    # PLOT (3 LAYERS) - 'PERCENT_OF_TOTAL', RASTER, AND BASEMAP
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
    fig.suptitle(t=("Percentage of the Total Ecosystem Extent "
                 "Containined in Each Hexcell\n"))
    plt.subplots_adjust(hspace=0.5)
    merged_shp.plot(column='percent_of', 
                    cmap='RdYlGn', 
                    ax=ax, 
                    alpha=0.65, 
                    legend=True)
    ax.set(title=(eco + ": Percentage of Total Ecosystem Extent "
                  "Containined in Each Hexcell\n"))
    cx.add_basemap(ax=ax, crs=merged_shp.crs, 
                    source=cx.providers.CartoDB.Positron) 
    ax.imshow(tif_layer_crs, cmap='jet', extent=raster_extent,
      interpolation='nearest')
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    ax.set_title(eco.upper())
    plt.savefig((eco + '_hexcell_as%_total_extent.png'), facecolor='w', 
                edgecolor='k', dpi=600)
    print(eco + "_hexcell_as%_total_extent saved as .png'\n")
    plt.close(fig)
    
    # THE FINAL PLOTS PDF ENDS UP GETTING 3 COPIES OF MESIC CURRENTLY?
    
    # convert '% of total' plot to image and add to 'plot_im_list', so 
    # that it'll be included in final pdf of plot images
    plot_im = glob(os.path.normpath(os.path.join(
    os.getcwd(), eco + "_hexcell_as%_total_extent.png")))
    plot_im = Image.open(plot_im[0])
    plot_im = plot_im.convert('RGB')
    plot_im_list.append(plot_im) 
    print("\nand will be included in 'final_plots.pdf'")
    
#         ax.imshow(tif_layer_crs, cmap='jet', extent=raster_extent, 
#                   interpolation='nearest')
#         plt.savefig((scen_id + '_pu_selections_over_raster.png'), 
#                     facecolor='w', edgecolor='k', dpi=600)
#         plt.close(fig)
#         print(scen_id + ": _pu_selections_over_raster saved as .png\n")

    # Append 'merged_shp_df' to 'shp_summary_ls' for final summary df
    shp_summary_ls.append(merged_shp_df)
        
# combine all dfs stored in the shp_summary_ls list into one pandas dataframe,
# and then save that dataframe as a .csv file
shp_summary_df = pd.concat(shp_summary_ls)
shp_summary_df.to_csv(os.path.normpath(
    os.path.join(new_dir, 'shp_summary.csv')), index=False)
print("\n'shp_summary.csv' saved to " + new_dir)

# save plot images to pdf (*currently duplicates the 1st image, needs fix)
plots_pdf_path = os.path.normpath(os.path.join(new_dir, 
                                               'combined_plots.pdf'))
plot_im_list[0].save(plots_pdf_path, save_all=True, 
                     append_images=plot_im_list)

print("\n'final_plots.pdf' saved to " + new_dir)

print('\nloop completed successfully')


new_dir C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots
ecotestdirs include 12 directories
ecotest_data_path C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_mesic\mesic10_run

Begin loop for scen_id: dome025_run
C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dome\dome025_run
cwd C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dome\dome025_run\selected_plot
globfile_selected contains: C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dome\dome025_run\dome025_run_pu_selected.csv
shp_layer_crs_path: C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dome\source_data\dome_espg_5070.shp
eco: dome
reprojected shp file check = PASS
dome025_run_merged.shp file check = PASS
dome025_run : merged shapefile saved to 'selected_plot' directory
reprojected tif file check = PASS
dome025_run

dune075_run: _pu_selections_over_raster saved as .png


and will be included in 'final_plots.pdf'
dune075_runinfo from dune075_run will be added to final summary


Begin loop for scen_id: dune10_run
C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dune\dune10_run
cwd C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dune\dune10_run\selected_plot
globfile_selected contains: C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dune\dune10_run\dune10_run_pu_selected.csv
shp_layer_crs_path: C:\Users\cwsnd\earth-analytics\data\kba_thresh_sa\20220628_103352_fullrun_2plots\eco_dune\source_data\dune_espg_5070.shp
eco: dune
reprojected shp file check = PASS
dune10_run_merged.shp file check = PASS
dune10_run : merged shapefile saved to 'selected_plot' directory
reprojected tif file check = PASS
dune10_run: dune10_run'_merged_results.csv' saved to 'selected_plot' dir
preparing plots...
selected_