## Metadata
Prepare NO3 data for interpolation

Author: Adrian Wiegman

Date Modified: 08/24/2024

## Setup Environment

In [22]:
# iphython options
# delete variables in workspace
%reset -f
#places plots inline
%matplotlib inline
#automatically reloads modules if they are changed
%load_ext autoreload 
%autoreload 2
# this codeblock sets up the environment from jupyter notebooks
setup_notebook = "C:/Users/Adrian.Wiegman/Documents/GitHub/Wiegman_USDA_ARS/Cran_Q_C/2_gis/scripts/_Setup.ipynb"
%run $setup_notebook # magic command to run the notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
***
loading python modules...

  `module_list` contains names of all loaded modules

...module loading complete

***
loading user defined functions...

type `fn_`+TAB to for autocomplete suggestions

 the object `def_list` contains user defined function names:
   fn_get_info
   fn_arcgis_table_to_df
   fn_arcgis_table_to_np_to_pd_df
   fn_run_script_w_propy_bat
   fn_try_mkdir
   fn_hello
   fn_recursive_glob_search
   fn_regex_search_replace
   fn_regex_search_0
   fn_arcpy_table_to_excel
   fn_agg_sum_df_on_group
   fn_add_prefix_suffix_to_selected_cols
   fn_calc_pct_cover_within_groups
   fn_buildWhereClauseFromList
   fn_FA_to_Q
   fn_alter_field_double
   fn_return_float
   fn_classify_wetlands

 use ??{insert fn name} to inspect
 for example running `??fn_get_info` returns:
[1;31mSignature:[0m [0mfn_get_info[0m[1;33m([0m[0mname[0m[1;33m=[0m[1;34m'fn_get_info'[0m[1;33m)[0m[1;33m

## Load NO3 Data

In [23]:
NO3_TN_all_imputed = arcpy.management.XYTableToPoint(
    in_table=r"C:\Users\Adrian.Wiegman\Documents\GitHub\Wiegman_USDA_ARS\Cran_Q_C\1_prep\df_NO3_TN_merged_SE_imputed.csv",
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_merged_SE_imputed",
    x_field="Long",
    y_field="Lat",
    z_field=None,
    coordinate_system='GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]];-400 -400 1000000000;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision'
)

In [24]:
# make a working copy of data 
_ = arcpy.management.CopyFeatures(
    in_features=NO3_TN_all_imputed,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_model")

In [25]:
# convert NO3 from text to float 
arcpy.management.CalculateField(
    in_table=_,
    field="NO3",
    expression="fn_return_float(!NO3!)",
    expression_type="PYTHON3",
    code_block="""def fn_return_float(x):
    try: 
        return(float(x))
    except:
        return(None)""",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")
# and drop zero values
arcpy.management.CalculateField(
    in_table=_,
    field="NO3",
    expression="fn_zeros_to_null(!NO3!)",
    expression_type="PYTHON3",
    code_block="""def fn_zeros_to_null(x):
    if x > 0:
        return(float(x))
    else:
        return(None)""",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

In [26]:
# convert TN from text to float
arcpy.management.CalculateField(
    in_table=_,
    field="TN",
    expression="fn_return_float(!TN!)",
    expression_type="PYTHON3",
    code_block="""def fn_return_float(x):
    try: 
        return(float(x))
    except:
        return(None)""",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

# convert TN drop zero values 
arcpy.management.CalculateField(
    in_table=_,
    field="TN",
    expression="fn_return_float(!TN!)",
    expression_type="PYTHON3",
    code_block="""def fn_zeros_to_null(x):
    if x > 0:
        return(float(x))
    else:
        return(None)""",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

In [27]:
# calculate fields
arcpy.management.CalculateField(
    in_table=_,
    field="NO3_TN_ratio",
    expression="fn_return_float(!NO3!/!TN!)",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

arcpy.management.CalculateField(
    in_table=_,
    field="lnTN",
    expression="math.log(!TN!)",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

arcpy.management.CalculateField(
    in_table=_,
    field="lnNO3",
    expression="math.log(!NO3!)",
    expression_type="PYTHON3",
    code_block="",
    field_type="DOUBLE",
    enforce_domains="NO_ENFORCE_DOMAINS")

In [43]:
arcpy.analysis.Clip(
    in_features=_,
    clip_features=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\domain_poly",
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_model_domain",
    cluster_tolerance=None
)

## Split the data into multiple groups

In [37]:
dataset = "MEP_CCR_BBC"
selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view="df_NO3_TN_imputed_model",
    selection_type="NEW_SELECTION",
    where_clause="dataset <> 'PSW' OR dataset <> 'WPP'",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_{}".format(dataset))

In [38]:
dataset = "MEP"
selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset = 'MEP' And NO3 > 0",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_{}".format(dataset))

In [39]:
# buzzards bay and cape cod rivers
dataset = "CCR_BBC"
selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset IN ('CCRO', 'BBC') And NO3 > 0",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_{}".format(dataset))

In [40]:
dataset = "WPP"

selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset IN ('DWMWPP', 'PSW') And NO3 > 0",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_{}".format(dataset))

In [41]:
dataset = "not_MEP_PSW"

selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset NOT IN ('MEP', 'PSW')",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_{}".format(dataset))

In [42]:
dataset = "not_PSW"

selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset NOT IN ('PSW')",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection ,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_{}".format(dataset))

In [43]:
arcpy.analysis.Clip(
    in_features=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_not_PSW",
    clip_features=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\domain_poly",
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_not_PSW_domain",
    cluster_tolerance=None
)

In [None]:
dataset = "not_BBC_PSW"

selection = arcpy.management.SelectLayerByAttribute(
    in_layer_or_view=_,
    selection_type="NEW_SELECTION",
    where_clause="dataset NOT IN ('PSW','BBC')",
    invert_where_clause=None
)

arcpy.management.CopyFeatures(
    in_features=selection ,
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_{}".format(dataset))

In [43]:
arcpy.analysis.Clip(
    in_features=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_{}".format(dataset),
    clip_features=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\domain_poly",
    out_feature_class=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_{}_domain".format(dataset),
    cluster_tolerance=None
)

In [44]:
arcpy.conversion.TableToExcel(
    Input_Table=r"C:\Workspace\Geodata\Cran_Q_C\Default.gdb\df_NO3_TN_imputed_not_PSW_domain",
    Output_Excel_File=r"C:\Workspace\Geodata\Cran_Q_C\outputs\df_NO3_TN_imputed_not_PSW_domain.xls",
    Use_field_alias_as_column_header="NAME",
    Use_domain_and_subtype_description="CODE"
)

## APPENDIX 

unused code
- the code below is for running statistical models

## Load Explanatory Variable Rasters

## Older NO3 datasets

## Format Data Table