In [1]:
# Import libraries
import os
import csv
import datetime
import io
import pathlib
import requests
import shutil
import time
from glob import glob

# from qgis.core import *
# import qmarxan_utils as qmu # import runMarxanOnce
# import marxanconpy as mx

import contextily as cx
import earthpy as et
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from rasterio.crs import CRS
from rasterio.plot import plotting_extent
import rioxarray as rxr
import subprocess

import kba_thresh_sa_scripts as ks

# set global cache override variable
CACHE_OVERRIDE = False

#### Check for 'earth-analytics/data/kba_thresh_sa' directory
* If it exists, it will be set as the working directory.
* If it doesn't exist, user is prompted to return to first notebook in workflow.

#### *IN THE FUTURE -* 
* Should there also be a check to verify that the 'hex_shp' dir exists, and/or 
that the 'hex_shp' directory actually contains shapefiles? 

In [2]:
# Define a filepath to 'earth-analytics/data/kba_thresh_sa' directory
data_path = os.path.normpath(os.path.join(et.io.HOME, 
                                          'earth-analytics', 
                                          'data', 
                                          'kba_thresh_sa'))

# Check if 'kba_thresh_sa' directory exists.  If it doesn't, prompt user to 
# return to the first notebook to begin workflow.  If it does, change working
# directory to 'earth-analytics/data/kba_thresh_sa', and define the path to 
# hex files directory that was created in the first notebook.
if os.path.exists(data_path):
 print('Working directory is set to earth-analytics/data/kba_thresh_sa.')
 os.chdir(data_path)
 # define the path to the hexfiles that was created in the 1st notebook
 shp_data_path = os.path.normpath(os.path.join(data_path, 'hex_shp'))
else:
 print("Please go to first notebook in workflow to set up initial'\
      'directories")
 

Working directory is set to earth-analytics/data/kba_thresh_sa.


In [3]:
# Define path to Marxan.exe executable file has been manually copied over to 
# 'kba_thresh_sa' directory (maybe it can be copied to there from repo?

# v4.0.6 (might be causing 'target2' crash? use 2.43 instead)
marxan_path = os.path.join(data_path, "Marxan_x64.exe")

# v2.43
marxan_243_path = os.path.join(data_path, 'Marxan_x64_243.exe')

# v1.8.10
marxan_1810_path = os.path.join(data_path, 'Marxan_1_8_10.exe')

#### Save table of information to 'earth-analytics/data/kba_thresh_sa'  

* The workflow requires an associated table, with information about the 
ecosystems to be analyzed *(Need to provide more detail about what specific 
information this table requires... Or will we simply work with the full Landfire
readme info, which is saved to the repo?  If so, do we need to add a unique 
one-word 'Short_Name' to each ecosystem listed, or change the file-naming system 
to use one of the existing numerical unique identifiers - like 'OID' or 
'Value'?)*.  

* This file will be saved locally to the 'earth-analytics/data/kba_thresh_sa' 
directory.  

* In our inital workflow, we are using the 'LF_EVT_2020_README' file that was
provided along with the Landfire raster.  
&nbsp; I've manually edited this file to  
&emsp; 1. show only the rows for the nine ecosystems selected for initial 
analysis.  
&emsp; 2. add a new column to show the one word short name Lana used when creating 
her initial files in ArcGIS.  

* This file has been manually uploaded to our GitHub repo as 'Assets/Data/
from_LF_EVT_2020_README.csv'. 

* The code below will download that file from URL to a pandas dataframe, then
save that dataframe locally as a csv.  

#### *IN THE FUTURE -* 
* This existing code could be reused if the user were prompted for a url where 
they have their table stored?
* Or,  
&emsp; 1. prompt user to save their table to 'earth-analytics/data/kba_thresh_sa' 
as specifically named 'ecosystem_info.csv'  
&emsp; 2. Then check for 'ecosystem_info.csv' in 
'earth-analytics/data/kba_thresh_sa'  
&emsp; 3. If found, load to dataframe  
&emsp; &emsp; If not found, prompt user to "Save ecosystm_info.csv' to 'earth-analytics
/data/kba_thresh_sa' directory, then rerun notebook" 
        
           

In [4]:
# Download the csv file stored on GitHub repository 
# (contains info on selected ecosystems taken from LF_EVT_2020_README 
# file, with an added 'Short_Name' field that is used as index)

# Provide the URL (using raw content at GitHub)
ecoinfo_url = ("https://raw.githubusercontent.com/csandberg303/"
               "kba-threshold-sensitivity-analysis/main/assets/data/"
               "from_LF_EVT_2020_README.csv")

# Create local cache overide variable
cache_override = True or CACHE_OVERRIDE

# Provide the path to local directory
ecoinfo_path = os.path.normpath(
    os.path.join(data_path, 'from_LF_EVT_2020_README.csv'))

# Create dataframe from information at provided URL
ecoinfo_df = pd.read_csv(ecoinfo_url).set_index('Short_Name')

# Check for csv in local directory and create from df if needed
if not os.path.exists(ecoinfo_path) or cache_override:
    # Read csv at URL into pandas dataframe, using 'Short_Name' col as index
    ecoinfo_df.to_csv(ecoinfo_path)
    
ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,A3_FINAL,B1_FINAL,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,CR,LC,LC,EN,VU,CR,G1,204,252,105
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,VU,LC,LC,NT,LC,VU,G3,218,238,243
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,LC,EN,EN,VU,CR,CR (EN-CR),G1G2,144,201,143
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,EN,LC,LC,VU-EN,EN,EN,G2,149,143,26
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,EN,LC,LC,VU,CR,EN (EN-CR),G1G2,70,96,32
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,CR,LC,LC,DD,CR,CR,G1,243,201,28
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,DD,LC,LC,NE,CR,CR,G1,245,252,179
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,DD,VU,LC,DD,LC,VU,G3,54,163,120
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,EN,LC,LC,DD,EN,EN,G2,131,173,223


In [5]:
# Add 2 columns to 'ecoinfo_df'

# 1st column - Add column 'Type' (needed for CLUZ addin input file 
#'targets.csv'; might not be needed for marxanconpy) Uses np.select to assign 
# a number (1 or 2), based upon the string seen in the 'RLE_FINAL' column
# (Type = 1 if 'CR', 'CR (CR-EN)', 'EN (CR-EN) or 'EN'; Type = 2 if 'VU')

# create a list of conditions
type_conditions = [(ecoinfo_df['RLE_FINAL'] == 'CR'), 
                  (ecoinfo_df['RLE_FINAL'] == 'CR (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN'),
                  (ecoinfo_df['RLE_FINAL'] == 'EN (EN-CR)'),
                  (ecoinfo_df['RLE_FINAL'] == 'VU')]

# create a list of the values to assign for each condition
type_values = [1, 1, 1, 1, 2]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Type'] = np.select(type_conditions, type_values)

# 2nd column - Add column 'Current_IUCN_TH'. Uses np.select to assign a 
# threshold percentage, based upon the column 'Type' (5% if 1, 10% if 2)

# create a list of conditions
current_threshold_conditions = [(ecoinfo_df['Type'] == 1), 
                               (ecoinfo_df['Type'] == 2)]

# create a list of the values to assign for each condition
current_threshold_values = [.05, .10]

# create new column using np.select to assign values using lists as arguments
ecoinfo_df['Current_IUCN_TH'] = np.select(
    current_threshold_conditions, current_threshold_values)

ecoinfo_df

Unnamed: 0_level_0,OID,Value,Count_30m,US_hectare,US_km2,EVT_Name_1,LFRDB,elcode,element_gl,NatureServ,...,B2_FINAL,C3_FINAL,D3_FINAL,RLE_FINAL,GRANK_EQUI,RED,GREEN,BLUE,Type,Current_IUCN_TH
Short_Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
prairie,132,7142,106116,9550,96,Columbia Basin Palouse Prairie,7142,CES304.792,722880,Columbia Basin Palouse Prairie,...,LC,EN,VU,CR,G1,204,252,105,1,0.05
foothill,137,7147,4546277,409165,4092,Western Great Plains Foothill and Piedmont Gra...,7147,CES303.817,722856,Western Great Plains Foothill and Piedmont Gra...,...,LC,NT,LC,VU,G3,218,238,243,2,0.1
mesic,224,7322,1090956,98186,982,Crowley's Ridge Mesic Loess Slope Forest,7322,CES203.079,798100,Crowley's Ridge Mesic Loess Slope Forest,...,EN,VU,CR,CR (EN-CR),G1G2,144,201,143,1,0.05
bluff,229,7327,2050154,184514,1845,East Gulf Coastal Plain Northern Loess Bluff F...,7327,CES203.481,723105,East Gulf Coastal Plain Northern Loess Bluff F...,...,LC,VU-EN,EN,EN,G2,149,143,26,1,0.05
pine,244,7346,5015841,451426,4514,Atlantic Coastal Plain Fall-line Sandhills Lon...,7346,CES203.254,723231,Atlantic Coastal Plain Fall-line Sandhills Lon...,...,LC,VU,CR,EN (EN-CR),G1G2,70,96,32,1,0.05
tallgrass,314,7421,10225903,920331,9203,Central Tallgrass Prairie,7421,CES205.683,722976,Central Tallgrass Prairie,...,LC,DD,CR,CR,G1,243,201,28,1,0.05
dune,323,7431,19717,1775,18,Southwest Florida Dune and Coastal Grassland,7431,CES203.539,723063,Southwest Florida Dune and Coastal Grassland,...,LC,NE,CR,CR,G1,245,252,179,1,0.05
dome,335,7447,900234,81021,810,South Florida Cypress Dome,7447,CES411.365,723151,South Florida Cypress Dome,...,LC,DD,LC,VU,G3,54,163,120,2,0.1
marsh,676,9197,1634510,147106,1471,Northern Atlantic Coastal Plain Tidal Salt Marsh,9197,CES203.519,723073,Northern Atlantic Coastal Plain Tidal Salt Marsh,...,LC,DD,EN,EN,G2,131,173,223,1,0.05


#### *IN THE FUTURE -* 

Currently our code will work with the ecosystem raster and hex files that Lana 
created in ArcGIS using the ArcMarxan plugin.  Ultimately we hope to work directly
with the full Landfire EVT 2020 raster, but the file is proving too large to 
effectively manage with our personal laptops. A solution may be found using the 
2016 Landfire data which has an available API (the 2020 data is scheduled to be 
published to the API later this year). An alternitive solution may be found using 
Dask, or possibly Amazon Web Services.

If/When our code can access the full CONUS raster, the source data in repo assets 
(and links that file in this code) will need to be updated to the full version of 
the raster's LF_2020_EVT_README file. Once that occurs, we could ask for user 
input to get entries matching the 'Values' column in that file, as a way of 
selecting specific ecosystems from the full Landfire EVT 2020 data. That user 
input would be assigned to a list variable 'value_filter'. 

The user would then be prompted for a one-word 'Short_Name' value for each 
ecosystem being analyzed (ex. mesic, dune, dome), to be used in file naming. This 
abbreviated name would be added to the ecoinfo_df.  

Currently the 'Short_Name' values have been hardcoded, to match what Lana chose 
when creating her ArcGis files. The 'value_filter' variable will also be 
hardcoded, to match the values seen in the LF_2020_EVT_README file for the three 
ecosystems we are using as test data (Crowley's Ridge Mesic Loess Slope Forest, 
Southwest Florida Dune and Coastal Grassland, and South Florida Cypress Dome).


In [6]:
# CREATE LISTS THAT WILL BE USED LATER IN ITERATION LOOPS

# Create list of threshold values to test
test_threshold = [1.0, 0.75, 0.50, 0.25]

# Define list variable 'value_filter' to show the values matching the 'Values' 
# column of 'ecoinfo_df' for the three ecosystems which have shp and hex files 
# uploaded to the GitHub repository - 'dome', 'dune', and 'mesic', 
value_filter = [
    7431, # dune
    7322, # mesic
    7447 # dome
    ]

# use value_filter to create a new df with only matching records
eco_subset_df = ecoinfo_df[ecoinfo_df['Value'].isin(value_filter)]

# Create alphabetical list of ecosystems to be analyzed, taken from the 
# 'Short_Name' column of eco_subset_df
eco_list = eco_subset_df.index.values.tolist()
eco_list.sort()

# print(eco_list)
eco_list

['dome', 'dune', 'mesic']

In [7]:
# DEFINE VARIABLES TO BE USED IN MARXAN RUN

# provide a testrun_basename (will appear in filename for final summary files)
testrun_basename = 'mx1810summarytest'

# ESPG value to set as CRS for raster and shapefile
espg = '5070'

# # Set prop for spec.dat (default value = 30% of total extent) 
# (must be between 0 and 1, Lana tutorial suggested 0.3)
prop = 0.3

# Species Penalty Factor - more detail needed... we're using default val of 1
spf = 10

# Number of repeat runs (or solutions) - orig value in qmarxan = 100
numreps = 100

# Number of iterations for annealing 
# orig value 1000000
# (RUNMODE 1 & 3 did not complete successfully with numitins=10 (or 1000?)
numitns = 10000

# Set blm (default value from qmarxan code = 1)
# blmtest = [0.1, 0.5, 1, 5, 10] # blmtest used for test loop only, no longer needed
blm = 10
    
# test runmode
# runmode_ls = [1 , 3] # runmode_ls used for test loop only, no longer needed
runmode = 1

#### Loop through the eco_list, create directories and input files needed by Marxan.

Each time the code below runs, a new timestamped diretory is created. Inside will
be subdirectories created from the 'Short_Name' value of the selected ecosystems 
seen in the 'eco_subset' variable.

Each of these ecosystem subdirectories will have the following named 
subdirectories -
* input - where files needed by marxan analysis are stored (bound.dat, pu.dat, 
puvsp.dat, spec.dat)
* output - where files generated by marxan analysis are stored
* pu - pu and report seen in qmarxan setup (purpose tbd)
* report - pu and report seen in qmarxan setup (purpose tbd)
* source data - where the rasters and PU hex_shp files are moved to, after they
are copied from the 'r_tif' and 'hex_shp' folders

A fifth input file 'input.dat' is created and placed in the main ecosystem 
directory.

The code below will also create a 'targets.csv' file for each value in 
the 'test_threshold' list variable.  This is done using the function 
'create_targets_files'. These 'targets.csv' files are used by the CLUZ 
plugin in QGIS.  They are not used by the QMarxan QGIS plugin, and ultimately may
not be needed for our marxanconpy workflow.  *Perhaps the loop inside
the function that uses the 'test_threshold' variable could be reused for another 
purpose?*

Currently, we are using the input files that Lana created using ArcGIS, that have 
been saved to the repo.  The code below will simply copy those files from the repo 
and save them into each ecosystem's 'input' folder.

#### *IN THE FUTURE -* 
* Our project sponsor has said that the set of input files are commonly prepared 
using GIS tools.  If that is the practice we will continue, new files for 
additional ecosystmes will be generated using QGIS/QMarxan.
* Another option may be to create a new function in this workflow for each 
specific input file (input.dat, bound.dat, pu.dat, puvsp.dat, spec.dat.).  A 
function has been written to create the input.dat file, using code seen in the 
qmarxan repository from Apropos Information Systems (used under the GPL-2.0 
license). Creating the input files programatically rather than in GIS may allow 
for easier manipulation of the files within the workflow to perform the 
sensitivity analysis of the KBA threshold values.

 #### Conducting a Sensitivity Analysis of the IUCN TH - spec.dat 'target2'
 
 The KBA threshold can be tested using the 'target2' column in the spec.dat 
 input file.  
'target2' sets a minimum size for an identified area to count against 
the target value.  If a patch of selected hexcells does not meet that
minimum value, it won't appear in a final solution. 
This value is calculated for each ecosystem -
iterate by - test_threshold = [1.0, 0.75, 0.50, 0.25]
'target2' = (total area * 'Current_IUCN_TH') x test_threshold



In [None]:
# USE THIS CELL FOR MARXAN v4.06 AND MARXAN v2.43 (FIND CELL BELOW FOR V1.8.10)

# RUN THIS CELL TO BEGIN AUTOMATED WORKFLOW (1ST CELL OF TWO - BEGIN MARXAN ANALYSIS)

# checks to see if a directory based upon 'testrun_basename' has already been 
# made, and if so will add a number to the end 'testrun_basename' before 
# creating new directory.
testrun_basename_ck = glob(os.path.join(data_path, '*' + testrun_basename))
                              

if testrun_basename_ck:
    expand = 1
    while True:
        expand +=1
        new_tr_bn = testrun_basename + str(expand)
        testrun_basename_ck = glob(os.path.join(data_path, '*' + new_tr_bn))
        if testrun_basename_ck:
            continue
        else:
            testrun_basename = new_tr_bn
            break
            
print('testrun_basename: ' + testrun_basename)

# set new directory name, based upon timestamp and provided 'testrun_basename'
new_dir = os.path.normpath(
    os.path.join(data_path, datetime.datetime.now().strftime('%Y%m%d_%H%M%S') 
                 + '_' + testrun_basename))

os.makedirs(new_dir)
os.chdir(new_dir)
print(new_dir + '\n')

# set heurtype - determined by runmode
# if RUNMODE = 3 then use heurtype = 1 (greedy), else -1 (not used)
if runmode == 3:
    heurtype = 1
else:
    heurtype = -1
print('runmode: ' + str(runmode) +'\nheurtype: ' + str(heurtype) +'\n')


### 1ST LOOP BEGINS HERE 

# LOOP THROUGH ECOSYSTEMS (in 'ecolist') FOR EACH TEST (in 'test_threshold')
for test in test_threshold:
    for eco in eco_list:

        # create Scenario ID - used as a prefix in the output files 
        scen_id = eco + str(test).translate(str.maketrans('', '', '.'))

        # Print first info statement, begin creating needed directories
        print("Begin run: " + scen_id)

        os.makedirs(scen_id) 
        ecotest_data_path = os.path.normpath(os.path.join(data_path, new_dir, 
                                                          scen_id))
        print(ecotest_data_path)
        os.chdir(ecotest_data_path)
        os.makedirs('source_data')
        os.chdir('source_data')

        # set 'target2' variable 
        # to equal KBA Threshold Value for Ecosystem at Test Level
        # target2 = US_m2 x Current_IUCN_TH x threshold test level
        current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
        us_km2 = eco_subset_df.at[eco,'US_km2']
        us_m2 = us_km2 * 1000000

        target2 = test * current_iucn_th * us_m2
        target2 = round(target2)

        # set the 'target' used in spec.dat file to equal 'target2'
        # (for target to equal 30% proportion, use target = us_m2 * 0.3)
        target = target2

        # print variables in run to screen for reference
        print('\n US-km2: ' + str(us_km2) + 
              '\n US_m2: ' + str(us_m2) + 
              '\n target: ' + str(target) + 
              '\n Current_IUCN_TH: ' + str(current_iucn_th) + 
              '\n threshold test level: ' + str(test) +
              '\n\n target2 = US_m2 x Current_IUCN_TH x threshold test level'
              '\n target2: ' + str(target2) + '\n\n'
              '\n BLM: ' + str(blm) + '\n' +
              '\n SPF: ' + str(spf) + '\n')

        # copy source files that were stored to the 'hex_shp' and 'r_tif' 
        # directories after running 1st notebook. Our workflow is currently  
        # using the files Lana created manually using ArcGIS
        ks.get_source_files(os.path.join(data_path, "hex_shp"), eco, scen_id)
        ks.get_source_files(os.path.join(data_path, "r_tif"), eco, scen_id)
        os.chdir(ecotest_data_path)

        # create 'input.dat' from formula adapted from 'qmarxan_toolbox' code,
        # which includes the 'formatAsME' - format as Marxan Exponent format
        # function. Some input parameters are provided, to replace the default
        # values provided in the qmarxan code. 
        # (At times the input.dat values have been edited directly in the 
        # input.dat formula, when a specific setting was needed for workflow (ex. ???).
        ks.create_input_dat(ecotest_data_path, 
                            blm, numreps, 
                            numitns, 
                            runmode, 
                            heurtype, 
                            scen_id)
        input_dat_path = os.path.normpath(os.path.join(
            ecotest_data_path, "input.dat"))
        input_dat = pd.read_csv(input_dat_path)
        print(input_dat)

        # create input directory, which is where four additional .dat files 
        # will be stored
        os.makedirs('input')
        eco_input_data_path = os.path.normpath(os.path.join(
            ecotest_data_path, 'input'))
        os.chdir(eco_input_data_path)

        # create pu.dat from original formula. 
        # Provides a record of each planning unit hex cell in the .shp file, with 
        # using a default uniform cost of '1', and  a status of '0' which 
        # indicates that unit is avaialable to Marxan for selection).
        ks.create_pu_dat(eco, 
                         ecotest_data_path, 
                         scen_id)  
        pu_dat_path = os.path.normpath(os.path.join(
            ecotest_data_path, 'input', "pu.dat"))
        pu_dat = pd.read_csv(pu_dat_path)
        pu_dat.info()

        # create spec.dat from v1 formula (includes 'prop' only)
#         ks.create_spec_dat_v1(eco_subset_df, eco, prop, spf, scen_id)
        
        # create spec.dat from v2 formula (includes 'prop' and 'target2')
        ks.create_spec_dat_v2(eco_subset_df, prop, target2, spf, eco, scen_id)
        
        # create spec.dat from v3 formula (includes 'target' and 'target2') - use with v1810
#         ks.create_spec_dat_v3(eco_subset_df, target_1810, target2, spf, eco, scen_id)
        
        # create spec.dat from v4 formula (includes 'target', no 'target2')
#         ks.create_spec_dat_v4(eco_subset_df, eco, target, spf, scen_id)

        spec_dat_path = os.path.normpath(os.path.join(
            ecotest_data_path, 'input', "spec.dat"))
        spec_dat = pd.read_csv(spec_dat_path)
        spec_dat.info()

        # use 'get_marxan_input_files' function to copy in any remaining .dat 
        # files needed (until formulas can be written to create these files). 
        # This formula currently is used for 'bound.dat' and 'puvsp.dat'.
        # Formula will copy files that have been created using ArcMarxan tool 
        # in ArcGIS then saved to the repository.
        ks.get_marxan_input_files(eco, 
                                  ["bound.dat", 
        #                                     "pu.dat", 
                                  "puvsp.dat", 
        #                                     "spec.dat"
                                  ],
                                 scen_id)

        # create remaining directories
        os.chdir(ecotest_data_path)
        os.makedirs('output')
        os.makedirs('report')
        os.makedirs('pu')

        # call on marxan executable to run analysis (currently using v2.43)
        os.startfile(marxan_243_path)
        
        print (scen_id + ": MARXAN ANALYSIS INITIATED")
        os.chdir(new_dir)
            
os.getcwd()

In [None]:
# inserting this line to create break in notebook execution while marxan runs
break_here

In [None]:
# USE THIS CELL FOR 2ND PART OF WORKFLOW - MARXAN 4.06 or MARXAN 2.43 ONLY
# (USE CELL BELOW FOR 1.8.10)

# WAIT FOR MARXAN RUNS TO COMPLETE, THEN RUN THIS CELL TO COLLECT SUMMARY INFO 
# (monitor marxan run screens; run this cell after 'The End' is seen in all)

# Define 'dirs' glob list as dirs ending in testrun_basename
dirs = glob(os.path.join(data_path, '*' + testrun_basename))

# Create empty lists outside of the for loop to store data
summary_info = []
best_info = []
error_list = []
bestimage_list = []
ssolnimage_list = []

# LOOP #2: LOOP THROUGH DIRECTORIES IN DIRS TO CREATE SUMMARY FILES AND PLOTS
for dir in dirs:
    # repeat 1st loop sequence that returns variables and screen output 
    os.chdir(dir)
    print(os.getcwd())
    for test in test_threshold:
        for eco in eco_list:
            print(os.getcwd())
            # NOT SURE IF THIS STEP IS STILL NEEDED, WITHOUT EXTRA LOOPS FOR RUNMODE/BLM TESTS??
            scen_id = eco + str(test).translate(str.maketrans('', '', '.'))
            ecotest_data_path = os.path.normpath(os.path.join(data_path, dir, 
                                                              scen_id))
            current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
            us_km2 = eco_subset_df.at[eco,'US_km2']
            us_m2 = us_km2 * 1000000

            target2 = test * current_iucn_th * us_m2
            target2 = round(target2)
            

            # change to output directory to generate original output info 
            # Currently 4 items - 
            #1) mxrunsummary, 
            #2) plots of 'best_run' and 'ssoln', 
            #3) combined '_sum.csv' file with add'l columns)
            #4) combined 'best_run' .csv file, 
            os.chdir(os.path.normpath(os.path.join(
                ecotest_data_path, 'output')))
            print(os.getcwd())

            # 1 - create expanded summary of info based on the '*_sen.dat' 
            # scenario details file Marxan generates
            ks.create_mxrun_summary(ecotest_data_path, espg, prop, blm, 
                                    target2, spf, scen_id, eco, eco_subset_df)
            print(os.getcwd())
            
            
            # 2 - get plots from 'best_run' file info and 'ssoln' file, if 
            # those files are available
            print(scen_id + ": Begin plotting")
            # 'get_output_plots' function will set crs of source files, check
            # if the 'best_run' and 'ssoln' files are avaialble, and if so the  
            # 'best_run' and/or 'ssoln' info will be plotted and saved to an
            # image which will be saved to pdf file in the 'new_dir' directory
            
            #USE THIS FUNCTION FOR MARXAN 243 AND 406
            ks.get_output_plots (ecotest_data_path, eco, espg, target2, scen_id, us_m2)       

            
            # convert 'best_run' plot to image and add to bestimage_list, so 
            # that it'll be added to final pdf of best plot images
            best_im = glob(os.path.normpath(os.path.join(ecotest_data_path, 
                                                         'output', 
                                                         '*_best_plot_w_bestrun_over_raster.png')))
            best_im = Image.open(best_im[0])
            best_im = best_im.convert('RGB')
            bestimage_list.append(best_im)
            print(scen_id + (": '_best_plot_w_bestrun_over_raster.png' image added to 'bestimage_list' " 
                  "to be included in 'final best_plot pdf'"))

            # IGNORING 'SSOLN' PLOTS FOR NOW
            # convert 'ssoln' plot to image and add to ssolnimage_list, so 
#             # that it'll be added to final pdf of ssoln plot images
#             ssoln_im = glob(os.path.normpath(os.path.join(ecotest_data_path, 
#                                                          'output', 
#                                                          '*ssoln_plot.png')))
#             ssoln_im = Image.open(ssoln_im[0])
#             ssoln_im = ssoln_im.convert('RGB')
#             ssolnimage_list.append(ssoln_im)
#             print(scen_id + (": 'ssoln_plot' image added to 'ssolnimage_list'" 
#                   " to be included in 'final ssoln_plot pdf'"))  
                                 
#             os.chdir(dir)
#             print(("About to save the summary info, incl pdf plots... \n"
#                   "cwd = " + os.getcwd()))

            # 3 - save summary info from '_sum.csv' file (if the file exists)
            # include 'amount's from best solution plot 
            out_sum_path = os.path.normpath(os.path.join(ecotest_data_path, 
                                                         'output', 
                                                         scen_id + 
                                                         '_sum.csv'))
            # check if the '_sum.csv' file was successfully created by Marxan:
            # IF SO, create df from file and add columns 'scen_id', 'runmode',
            # 'blm', 'spf' 'prop/target value' (and target2?  THIS COULD BE REVISED/UPDATED)
            if os.path.exists(out_sum_path):
                os.getcwd()
                out_sum_df = pd.read_csv(out_sum_path)
                out_sum_df['scen_id'] = scen_id
                out_sum_df['runmode'] = runmode
                out_sum_df['blm'] = blm
                out_sum_df['spf'] = spf
#                 spec_dat_path = os.path.normpath(os.path.join(ecootest_data_path, 'input', "spec.dat"))
                spec_dat_df = pd.read_csv(spec_dat_path)
#                 out_sum_df["spec.dat 'target' value"] = spec_dat.columns[1]
                out_sum_df['spec.dat target value'] = spec_dat.iat[0,1] # STILL NOT RIGHT
#                 data_from_best_dbf = glob(os.path.normpath(os.path.join(ecotest_data_path, 'output', '*w_best_and_ssoln.dbf')))
# #             best_im = glob(os.path.normpath(os.path.join(ecotest_data_path, 
# #                                                          'output', 
# #                                                          '*best_plot.png')))
#                 best_results = data_from_best_dbf[0]
#                 best_results_df = pd.read_csv(best_results)
#                 out_sum_df['total extent'] = best_results['amount'].sum()
#                 out_sum_df['selected amount'] = [best_results['SOLUTION'] == 1, 'amount'].sum()
                current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
                out_sum_df['Current_IUCN_TH'] = current_iucn_th
                out_sum_df['KBA m2 @ Current_IUCN_TH'] = (eco_subset_df.at[eco,'US_km2']*1000000)*current_iucn_th               

                print(scen_id + (": summary info will be added to "
                                 "'final_summary.csv'\n"))
                # Append the data to 'out_sum_df'
                summary_info.append(out_sum_df)
            # IF IT DOESN'T EXIST, 
            # print error message and add 'scen_id' to 'error_list'
            else:
                print(os.getcwd())
                print(scen_id + (': run did not complete with a successful '
                                 'run summary; added to error list\n'))
                error_list.append([(scen_id + (": run completed in error, no "
                                               "'_sum' file found"))])

            # 3 - save best run info from '_best.csv' file (if file exists)
            out_best_path = os.path.normpath(os.path.join(ecotest_data_path, 
                                                          'output', scen_id + 
                                                          '_best.csv'))
            # check if '_best.csv' file exists:
            # IF SO, create df from file and add columns 'scen_id', 'runmode' 
            # 'blm', 'spf' 'prop/target value' (and target2?)
            if os.path.exists(out_best_path):
                print(os.getcwd())
                out_best_df = pd.read_csv(out_best_path)
                out_best_df['scen_id'] = scen_id
                out_best_df['runmode'] = runmode
                out_best_df['blm'] = blm
                out_best_df['spf'] = spf
                spec_dat_path
                out_best_df['spec.dat prop or target'] = spec_dat.columns[1]
#                 out_best_df['prop/target value'] = spec_dat.iat[0,1]
                print(scen_id + (": best run info will be added to "
                                 "'final_best_runs.csv'\n"))
                # Append the data to 'out_best_df'
                best_info.append(out_best_df)
            # IF IT DOESN'T EXIST, 
            # print error message and add 'scen_id' to 'error_list'
            else:
                print(os.getcwd())
                print(scen_id + (": run did not complete with a successful "
                                 "'_best' file; added to error list\n"))
                error_list.append([(scen_id + (": run completed in error, no"
                                    "'_best' file found"))])
                

                 
    # When ecotest loop completes, check if 'summary_info' is an empty list 
    # (empty list = False)
    # If it isn't an empty list, save summary_info to 'final_summary.csv'
    if summary_info:
        print(os.getcwd())
        # Combine all the dataframes stored in 'summary_info' list during the 
        # loop into one pandas dataframe
        final_summary_df = pd.concat(summary_info, axis=0)
        final_summary_df.to_csv(testrun_basename + 'final_summary.csv')
        print("review summary of non-error runs in 'final_summary.csv in " + dir)    
    # if 'summary_info' is an empty list, print error message on screen and
    # add to errorlog
    else: 
        print(os.getcwd())
        print("entire run completed without a successful run summary; added to error list\n")
        error_list.append([("entire (add detail about which run??) run "
                            "completed without a '_sum' file for any "
                            "ecosystem at any test level")])

    # check if 'best_info' is an empty list (empty list = False)
    # If it isn't, add best_info to 'final_best_runs.csv' 
    if best_info:
        print(os.getcwd())
        # Combine all the dataframes stored in the all_df list during the loop 
        # into one pandas dataframe
        pd.concat(best_info)
        final_best_df = pd.concat(best_info, axis=0)
        final_best_df.to_csv(testrun_basename + 'final_best_runs.csv')
        print("combined 'best run' info can be found in 'final_best_runs.csv in " + dir)
    # If summary_info is an empty list, print error message on screen and add 
    # to errorlog
    else: 
        print(os.getcwd())
        print("entire run did not complete with a successful 'best_run'; added to error list\n")
        error_list.append([("entire (add detail about which run??) run "
                            "completed without a 'best_run' file for any "
                            "ecosystem at any test level")])

    # save error_list to 'final_errorlog.csv'
    error_list_df = pd.DataFrame(error_list)
    error_list_df.to_csv(testrun_basename + 'final_errorlog.csv')
    print("\n'final_errorlog.csv' saved to " + os.getcwd())
              
    # save plot images to pdf
    bestimage_pdf_path = os.path.normpath(os.path.join(
        testrun_basename + '_combined_best_plots.pdf'))
    bestimage_list[0].save(
        bestimage_pdf_path, save_all=True, append_images=bestimage_list)
    print("\n'_combined_best_plots.pdf' saved to " + os.getcwd())
    
    # ignoring ssoln images for now
#     ssolnimage_pdf_path = os.path.normpath(os.path.join(
#         testrun_basename + '_combined_ssoln_plots.pdf'))
#     ssolnimage_list[0].save(
#         ssolnimage_pdf_path, save_all=True, append_images=ssolnimage_list)
#     print("\n'_combined_ssoln_plots.pdf' saved to " + os.getcwd())
              
print('\nSUMMARY ANALYSIS RUN COMPLETED')

In [None]:
break_here

In [None]:
# TEST CELL
# open 'best' from marxan1810 output and merge with puvsp to get total amounts per selected hexcell
# (best from marxan1810 shows single column on selected cells only, ~13 cells out of 500 in dome ex.)

ecotest_data_path = os.path.normpath(os.path.join(data_path, '20220621_224409_mx1810summarytest222', 'dome10'))

best_marxan_1810_path = glob(os.path.normpath(os.path.join(ecotest_data_path, 'output', '*_best.dat')))
best_marxan_1810_path[0]

if os.stat(best_marxan_1810_path[0]).st_size == 0 or best_marxan_1810_path == []:
    print('143')
# best_marxan_1810 = pd.read_csv(best_marxan_1810_path[0], header=None, index_col=False)
# best_marxan_1810.columns = ['pu']

# puvsp_path = os.path.normpath(os.path.join(ecotest_data_path, 'input', 'puvsp.dat'))
# puvsp_dat = pd.read_csv(puvsp_path)
# puvsp_dat

# best_mx1810_w_amt = pd.merge(puvsp_dat, best_marxan_1810, on ='pu', how ='inner')
# best_mx1810_w_amt
                                        

In [None]:
test = os.path.getsize("best_marxan_1810_path[0]") == 0
test
best_marxan_1810

In [None]:
# TEST CELL - try to merge info from .dbf/gpd with pandas dataframe 'out_sum_df' for expanded final summary

# data_from_best_dbf = glob(os.path.normpath(os.path.join(ecotest_data_path, 'output', '*w_best_and_ssoln.dbf')))
#             best_im = glob(os.path.normpath(os.path.join(ecotest_data_path, 
#                                                          'output', 
#                                                          '*best_plot.png')))
data_from_best_dbf = os.path.normpath(os.path.join(ecotest_data_path, 'output', eco + '_w_best_and_ssoln.dbf'))



# best_results = data_from_best_dbf[0]

os.getcwd()
out_sum_df = pd.read_csv(out_sum_path)
out_sum_df['scen_id'] = scen_id
out_sum_df['runmode'] = runmode
out_sum_df['blm'] = blm
out_sum_df['spf'] = spf
#                 spec_dat_path = os.path.normpath(os.path.join(ecootest_data_path, 'input', "spec.dat"))
spec_dat_df = pd.read_csv(spec_dat_path)
#                 out_sum_df["spec.dat 'target' value"] = spec_dat.columns[1]
out_sum_df['spec.dat target value'] = spec_dat.iat[0,1]
glob = glob(os.path.normpath(os.path.join(ecotest_data_path, 'output', '*w_best_and_ssoln.dbf')))
best_results = glob[0]
best_results_df = pd.read_csv(best_results)
out_sum_df['total extent'] = best_results['amount'].sum()
out_sum_df['selected amount'] = [best_results['SOLUTION'] == 1, 'amount'].sum()
current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
out_sum_df['Current_IUCN_TH'] = current_iucn_th
out_sum_df['KBA m2 @ Current_IUCN_TH'] = (eco_subset_df.at[eco,'US_km2']*1000000)*current_iucn_th   

In [None]:
import pathlib
path_object = pathlib.path(data_from_best_dbf)
df = geopandas.read_file(path_object)

In [None]:
os.getcwd()
out_sum_df = pd.read_csv(out_sum_path)
out_sum_df['scen_id'] = scen_id
out_sum_df['runmode'] = runmode
out_sum_df['blm'] = blm
out_sum_df['spf'] = spf
#                 spec_dat_path = os.path.normpath(os.path.join(ecootest_data_path, 'input', "spec.dat"))
spec_dat_df = pd.read_csv(spec_dat_path)
#                 out_sum_df["spec.dat 'target' value"] = spec_dat.columns[1]
out_sum_df['spec.dat target value'] = spec_dat.iat[0,1]
glob = glob(os.path.normpath(os.path.join(ecotest_data_path, 'output', '*w_best_and_ssoln.dbf')))
best_results = glob[0]
best_results_df = pd.read_csv(best_results)
out_sum_df['total extent'] = best_results['amount'].sum()
out_sum_df['selected amount'] = [best_results['SOLUTION'] == 1, 'amount'].sum()
current_iucn_th = eco_subset_df.at[eco,'Current_IUCN_TH']
out_sum_df['Current_IUCN_TH'] = current_iucn_th
out_sum_df['KBA m2 @ Current_IUCN_TH'] = (eco_subset_df.at[eco,'US_km2']*1000000)*current_iucn_th   

In [None]:
ssolnimage_pdf_path = os.path.normpath(os.path.join(
    testrun_basename + '_combined_ssoln_plots.pdf'))
ssolnimage_list = ssolnimage_list.sort()

In [None]:
bestimage_list