# This notebook classifies each HUC basin by *Gi** value

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import glob
from tqdm import tqdm

%matplotlib inline
import statsmodels as sm
from statsmodels.imputation import mice
import warnings

warnings.filterwarnings("ignore")

In [None]:
element_list = [
    "ag",
    "al",
    "as",
    "au",
    "b",
    "ba",
    "be",
    "bi",
    "ca",
    "cd",
    "ce",
    "co",
    "cr",
    "cs",
    "cu",
    "dy",
    "er",
    "eu",
    "fe",
    "ga",
    "gd",
    "ge",
    "hf",
    "hg",
    "ho",
    "k",
    "la",
    "li",
    "lu",
    "mg",
    "mn",
    "mo",
    "na",
    "nb",
    "nd",
    "ni",
    "os",
    "p",
    "pb",
    "pd",
    "pr",
    "pt",
    "rb",
    "re",
    "rh",
    "ru",
    "sb",
    "sc",
    "se",
    "sm",
    "sn",
    "sr",
    "ta",
    "tb",
    "te",
    "th",
    "ti",
    "tl",
    "tm",
    "u",
    "v",
    "w",
    "y",
    "yb",
    "zn",
    "zr",
]

All rock *Gi** values should be calculated in ArcMap and exported to an `xls` file for each element, the next cell reads in the excel files for rock hotspots, and reads in the sediment hotspots that were calculated in `03 Sediment hotspots.ipynb`. It combines the two for each element and classifies as `known`, `missed`, `potential`, `below`, and `background`

In [None]:
# this cell does all elements for rock at once in a batch
for element in element_list:
    try:
        huc12 = gpd.read_file(
            r"J:\Geology\WSGS\GIS\huc_12.shp"
        )  # reads in the hydraulic basin units and adds an area column
        huc12 = huc12.to_crs({"init": "epsg:3732"})

        frame = pd.read_excel(
            "J:/Geology/WSGS/Projects/Critical Minerals/hotspot analysis/rock hotspot joined with hucs/"
            + element
            + ".xls",
            index_col=[0],
            sheet=0,
        )
        frame = frame.drop(
            columns=[
                "Join_Count",
                "TARGET_FID",
                "AREA",
                "PERIMETER",
                "WY_HU12_",
                "WY_HU12_ID",
                "HUC_8",
                "HUC_10",
                "ACRES",
                "STATES",
                "NCONTRB_A",
                "HU_10_DS",
                "HU_10_NAME",
                "HU_10_MOD",
                "HU_10_TYPE",
                "HU_12_DS",
                "HU_12_NAME",
                "HU_12_MOD",
                "HU_12_TYPE",
                "HU_2_NAME",
                "HU_4_NAME",
                "HU_6_NAME",
                "HU_8_NAME",
                "Shape_Leng",
                "SOURCE_ID",
                "GiPValue",
                "NNeighbors",
                "Gi_Bin",
                "Shape_Length",
                "Shape_Area",
            ]
        )
        summary = frame.groupby("HUC_12", as_index=False).sum()
        middle = huc12.merge(summary.astype(object), on="HUC_12", how="right")
        combined = huc12.merge(
            middle[["HUC_12", "GiZScore"]].astype(str), on="HUC_12", how="left"
        )
        combined.dropna(inplace=True)
        plt.figure(figsize=(18, 12))

        combined["zscore"] = combined.GiZScore.astype(float).values
        combined.to_file(
            driver="ESRI Shapefile",
            filename="/results/" + element + "_rock_heatmap.shp",
        )

        frame = pd.read_csv(
            "/sed hotspot joined with hucs/" + element + "_ppm.csv"
        )
        summary = frame.groupby("HUC_12", as_index=False).sum()
        middle = huc12.merge(summary.astype(object), on="HUC_12", how="right")
        sed_comb = huc12.merge(
            middle[["HUC_12", "z_score"]].astype(str), on="HUC_12", how="left"
        )
        crs = {"init": "epsg:3732"}
        sed_comb.dropna(inplace=True)
        sed_comb = sed_comb.to_crs(crs)
        sed_comb["zscore"] = sed_comb.z_score.astype(float).values

        sed_comb.to_file(
            driver="ESRI Shapefile",
            filename="/results/" + element + "_sed_heatmap.shp",
        )

        missed = sed_comb[
            (sed_comb["zscore"] >= 2)
            & sed_comb["HUC_12"].isin(combined[combined["zscore"] < 2].HUC_12)
        ]
        missed.drop(["z_score"], axis=1, inplace=True)
        missed["class"] = "missed"

        known = combined[combined["zscore"] >= 2]
        known.drop(["GiZScore"], axis=1, inplace=True)
        known["class"] = "known"

        potential = sed_comb[
            (sed_comb["zscore"] >= 2)
            & (~sed_comb["HUC_12"].isin(missed.HUC_12))
            & (~sed_comb["HUC_12"].isin(known.HUC_12))
        ]
        potential.drop(["z_score"], axis=1, inplace=True)
        potential["class"] = "potential"

        below = sed_comb[
            (sed_comb["zscore"] <= -2)
            & (
                sed_comb["HUC_12"].isin(
                    combined[combined["zscore"] <= -2].HUC_12
                )
            )
        ]
        below.drop(["z_score"], axis=1, inplace=True)
        below["class"] = "below"

        bg = huc12[
            (~huc12["HUC_12"].isin(below.HUC_12))
            & (~huc12["HUC_12"].isin(potential.HUC_12))
            & (~huc12["HUC_12"].isin(known.HUC_12))
            & (~huc12["HUC_12"].isin(missed.HUC_12))
        ]
        bg["class"] = "background"

        classes = pd.concat([missed, known, potential, bg, below], sort=True)
        classes.to_file(
            driver="ESRI Shapefile",
            filename="/results/" + element + "_classes.shp",
        )
    except:
        print(element + " was not successfully written to shapefile")

In [None]:
element = "co"
# this cell creates individual rock heatmaps
huc12 = gpd.read_file(
    r"\huc_12.shp"
)  # reads in the hydraulic basin units and adds an area column
huc12 = huc12.to_crs({"init": "epsg:3732"})

frame = pd.read_excel(
    "/rock hotspot joined with hucs/"
    + element
    + "_SpatialJoin_TableToExcel.xls",
    index_col=[0],
    sheet_name=element + "_SpatialJoin_TableToExcel",
)
frame = frame.drop(
    columns=[
        "Join_Count",
        "TARGET_FID",
        "AREA",
        "PERIMETER",
        "WY_HU12_",
        "WY_HU12_ID",
        "HUC_8",
        "HUC_10",
        "ACRES",
        "STATES",
        "NCONTRB_A",
        "HU_10_DS",
        "HU_10_NAME",
        "HU_10_MOD",
        "HU_10_TYPE",
        "HU_12_DS",
        "HU_12_NAME",
        "HU_12_MOD",
        "HU_12_TYPE",
        "HU_2_NAME",
        "HU_4_NAME",
        "HU_6_NAME",
        "HU_8_NAME",
        "Shape_Leng",
        "SOURCE_ID",
        "GiPValue",
        "NNeighbors",
        "Gi_Bin",
        "Shape_Length",
        "Shape_Area",
    ]
)
summary = frame.groupby("HUC_12", as_index=False).sum()
middle = huc12.merge(summary.astype(object), on="HUC_12", how="right")
combined = huc12.merge(
    middle[["HUC_12", "GiZScore"]].astype(str), on="HUC_12", how="left"
)
combined.dropna(inplace=True)
plt.figure(figsize=(18, 12))
combined.plot(
    column=pd.to_numeric(combined["GiZScore"]),
    cmap="Greens",
    legend=True,
    figsize=(20, 10),
)
plt.title("Rock Sample Gi* Scores")

combined["zscore"] = combined.GiZScore.astype(float).values

In [None]:
# this does the same as above, but for sediment samples
frame = pd.read_csv("/sed hotspot joined with hucs/" + element + "_ppm.csv")
summary = frame.groupby("HUC_12", as_index=False).sum()
middle = huc12.merge(summary.astype(object), on="HUC_12", how="right")
sed_comb = huc12.merge(
    middle[["HUC_12", "z_score"]].astype(str), on="HUC_12", how="left"
)
crs = {"init": "epsg:3732"}
sed_comb.dropna(inplace=True)
sed_comb = sed_comb.to_crs(crs)
sed_comb.plot(
    column=pd.to_numeric(sed_comb["z_score"]),
    cmap="Oranges",
    legend=True,
    figsize=(20, 10),
)
plt.title("Sediment Sample Gi* Scores")
sed_comb["zscore"] = sed_comb.z_score.astype(float).values

We can then create a map of each class and assign values to each HUC based on cumulative *Gi** values

In [None]:
missed = sed_comb[(sed_comb['zscore']>=2) & sed_comb['HUC_12'].isin(combined[combined['zscore']<2].HUC_12)]
missed[]
missed.drop(['z_score'], axis=1, inplace=True)
missed['class'] = 'missed'

known = combined[combined['zscore']>=2]
known.drop(['GiZScore'], axis=1, inplace=True)
known['class'] = 'known'

potential = sed_comb[(sed_comb['zscore']>=2) & (~sed_comb['HUC_12'].isin(missed.HUC_12)) & (~sed_comb['HUC_12'].isin(known.HUC_12))]
potential.drop(['z_score'], axis=1, inplace=True)
potential['class'] = 'potential'

below = sed_comb[(sed_comb['zscore']<=-2) & (sed_comb['HUC_12'].isin(combined[combined['zscore']<=-2].HUC_12))]
below.drop(['z_score'], axis=1, inplace=True)
below['class'] = 'below'

bg = huc12[(~huc12['HUC_12'].isin(below.HUC_12)) & (~huc12['HUC_12'].isin(potential.HUC_12)) & (~huc12['HUC_12'].isin(known.HUC_12)) & (~huc12['HUC_12'].isin(missed.HUC_12))]
bg['class'] = 'background'
classes = pd.concat([missed, known, potential, bg, below], sort=True)

In [None]:
classes[classes["class"] == "missed"].plot(
    column="zscore", cmap="viridis", legend=True, figsize=(20, 10)
)
plt.title("Missed Areas Gi* Scores")