# This notebook calculates the *Gi** values for the sediment samples

### Note rock sample *Gi** values were calculated in `ArcMap`

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline
import geopandas as gpd
import itertools, glob
from tqdm import tqdm

In [None]:
imputed = gpd.read_file(r"max_nure_values.shp")

In [None]:
hucs = imputed

In [None]:
hucs["FID"] = hucs.index.values

In [None]:
keys = hucs.loc[0:].FID  # huc basin number

In [None]:
ds = []  # downstream basin list
for i in range(len(hucs)):
    downstream = hucs[hucs["HUC_12"] == hucs.loc[i].HU_12_DS].FID.values
    ds.append(downstream)
# huc basin downstream of huc basin

In [None]:
us = []  # upstream basin list
for i in range(len(hucs)):
    upstream = hucs.loc[hucs["HU_12_DS"] == hucs.loc[i].HUC_12].FID.values
    us.append(upstream)

In [None]:
lso = []  # combined upstream and downstream basins
for i in range(len(ds)):
    ls = []
    if ds[i].size > 0:
        ls.append(ds[i][0])
    else:
        ls.append(-1)
    for j in range(len(us[i])):
        if us[i].size > 0:
            ls.append(us[i][j])
        else:
            ls.append(-1)
    lso.append(ls)

In [None]:
neighbors = dict(zip(keys, lso))

In [None]:
# this calculates the Gi* values for the dendritic spatial network
def GIstar(element):
    Xbar = hucs[
        element
    ].mean()  # calculates the mean value of the element across all HUCS
    S = np.sqrt(
        (((hucs[element]) ** 2).sum() / len(hucs)) - (Xbar ** 2)
    )  # calculates the S value in the denominator
    import scipy.sparse as sp  # import bruh

    mat = sp.dok_matrix(
        (len(hucs), len(hucs)), dtype=np.int8
    )  # creates a sparse matrix based on a dictionary of keys from above
    for (
        huc_a,
        huc_b,
    ) in (
        neighbors.items()
    ):  # this creates a sparse matrix of spatial contiguity weights
        mat[huc_a, huc_b] = 1
        mat = mat.transpose().tocsr()
    z_score = []
    for i in tqdm(
        range(len(hucs))
    ):  # here is the G* function that calculates the z-score
        try:
            row_standardized = mat.toarray()[i] / mat.toarray()[i].sum()
            numerator = ((hucs[element].values * row_standardized).sum()) - (
                row_standardized.sum() * Xbar
            )  # numerator
            denominator = (
                np.sqrt(
                    abs(
                        ((row_standardized ** 2).sum())
                        - (row_standardized.sum()) ** 2
                    )
                    / (len(row_standardized) - 1)
                )
                * S
            )  # denominator
            z_score.append(numerator / denominator)
        except:
            z_score.append(np.nan)
    hucs["z_score"] = z_score
    hucs["z_score"].replace([np.inf, -np.inf], np.nan, inplace=True)
    hucs["z_score"].fillna(0, inplace=True)
    G = hucs[[element, "HUC_12", "z_score", "geometry"]]
    G.to_file(
        driver="ESRI Shapefile",
        filename="./drainage hotspots/" + str(element) + "_hotspots.shp",
    )
    H = hucs[[element, "HUC_12", "z_score"]]
    H.to_csv(r"\\sed hotspot joined with hucs\\" + str(element) + ".csv")

In [None]:
# calculate Gi* for all elements in the sediment dataset
element_list = hucs.columns[26:-6].values
for thing in element_list:
    GIstar(thing)