In [1]:
import pandas as pd
import numpy as np

In [2]:
import os 
import sys 
parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
from StreamCat_functions_gpu import appendConnectors, swapper



In [None]:
def Accumulation(tbl, comids, lengths, upstream, tbl_type, icol="COMID"):
    """
    __author__ =  "Marc Weber <weber.marc@epa.gov>"
                  "Ryan Hill <hill.ryan@epa.gov>"
    Uses the 'Cat' and 'UpCat' columns to caluculate watershed values and returns those values in 'Cat' columns
        so they can be appended to 'CatResult' tables in other zones before accumulation.

    Arguments
    ---------
    tbl                   : table containing watershed values
    comids                : numpy array of all zones comids
    lengths               : numpy array with lengths of upstream comids
    upstream              : numpy array of all upstream arrays for each COMID
    tbl_type              : string value of table metrics to be returned
    icol                  : column in arr object to index
    """
    # RuntimeWarning: invalid value encountered in double_scalars
    # np.seterr(all="ignore")
    
    coms = tbl[icol].values.astype("int32")  # Read in comids
    indices = swapper(coms, upstream)  # Get indices that will be used to map values
    del upstream  # a and indices are big - clean up to minimize RAM
    cols = tbl.columns[1:]  # Get column names that will be accumulated
    z = np.zeros(comids.shape)  # Make empty vector for placing values
    data = np.zeros((len(comids), len(tbl.columns)))
    data[:, 0] = comids  # Define first column as comids
    accumulated_indexes = np.add.accumulate(lengths)[:-1]
    # accumulated_indexes = np.ufunc.accumulate(lengths)[:-1]
    # accumulated_indexes = np.cumsum(lengths)[:-1]
    # Loop and accumulate values
    for index, column in enumerate(cols, 1):
        col_values = tbl[column].values.astype("float")
        all_values = np.split(col_values[indices], accumulated_indexes)
        if tbl_type == "Ws":
            # add identity value to each array for full watershed
            all_values = np.array(
                [np.append(val, col_values[idx]) for idx, val in enumerate(all_values)],
                dtype=object,
            )

            # all_values = [np.append(val, col_values[idx]) for idx, val in enumerate(all_values)]

        if index == 1:
            area = all_values.copy()
        if "PctFull" in column:
            values = [
                np.average(np.nan_to_num(val), weights=w) # changed from np.ma.average
                for val, w in zip(all_values, area)
            ]
        elif "MIN" in column or "MAX" in column:
            func = np.max if "MAX" in column else np.min
            # initial is necessary to eval empty upstream arrays
            # these values will be overwritten w/ nan later

            # initial = -999 if "MAX" in column else 999999

            initial = -999999 if "MAX" in column else 999999

            values = np.array([func(val, initial=initial) for val in all_values])
            values[lengths == 0] = col_values[lengths == 0]
        else:
            values = np.array([np.nansum(val) for val in all_values])
        data[:, index] = values
    data = data[np.in1d(data[:, 0], coms), :]  # Remove the extra comids
    outDF = pd.DataFrame(data)
    prefix = "UpCat" if tbl_type == "Up" else "Ws"
    outDF.columns = [icol] + [c.replace("Cat", prefix) for c in cols.tolist()]
    outDF = outDF.loc[:, ~outDF.columns.duplicated()] # New to remove duplicate cols
    print(outDF.columns)
    areaName = outDF.columns[outDF.columns.str.contains("Area")][0]
    # identifies that there is no area in catchment mask,
    # then NA values for everything past Area, covers upcats w. no area AND
    # WS w/ no area
    
    no_area_rows, na_columns = (outDF[areaName] == 0), outDF.columns[2:]
    print(no_area_rows)
    print(na_columns)
    outDF.loc[no_area_rows, na_columns] = np.nan # changed from .loc
    return outDF

In [4]:
Connector = "O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/Allocation_and_Accumulation/CanalDensity_connectors.csv"

In [25]:
zone = '01'
OUT_DIR = "C:/Users/thudso02/repositories/StreamCat/high_res_data/output"

In [13]:
inter_vpu = pd.read_csv("../config_tables/InterVPU.csv")
inter_vpu.head()   # .ToZone.isin(zone)

Unnamed: 0,thruCOMIDs,FromZone,ToZone,AdjustComs,toCOMIDs,DropCOMID,UpCOMadd,removeCOMs,comments
0,18267741,14,15,0,0,0,20734041,24719331,
1,20734037,14,15,0,0,0,0,10466473,
2,1861888,6,5,0,0,0,0,15714785,
3,1862004,6,5,0,0,0,0,0,
4,1862014,6,5,1862004,0,1862014,0,0,


In [None]:
accum = np.load(f"./accum_npy/accum_{zone}_speed_test.npz")
accum

In [9]:
cat = pd.read_csv(f"{OUT_DIR}/CanalDensity_01.csv")

In [10]:
if zone in inter_vpu.ToZone.values:
    cat = appendConnectors(cat, Connector, zone, inter_vpu)

In [14]:
cat.COMID = cat.COMID.astype(accum["comids"].dtype)
cat.set_index("COMID", inplace=True)
cat = cat.loc[accum["comids"]].reset_index().copy()

In [15]:
up = Accumulation(
    cat, accum["comids"], accum["lengths"], accum["upstream"], "Up"
)
up            

Unnamed: 0,COMID,UpCatAreaSqKm,UpCatGRIDCODE,UpCatzone,UpCatmean,UpCatCount,UpCatSum,UpCatmin,UpCatmax,UpUpCatAreaSqKm,...,UpUpCatmin,UpUpCatmax,WsAreaSqKm,WsGRIDCODE,Wszone,Wsmean,WsCount,WsSum,Wsmin,Wsmax
0,718276.0,0.0000,,,,,,,,,...,,,,,,,,,,
1,718808.0,0.0000,,,,,,,,,...,,,,,,,,,,
2,718792.0,0.0000,,,,,,,,,...,,,,,,,,,,
3,718288.0,0.0000,,,,,,,,,...,,,,,,,,,,
4,718882.0,0.0000,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65963,4600087.0,4.4127,1877345.0,1877345.0,0.0,4903.0,0.0,0.0,0.0,0.0000,...,0.0,0.0,4.4127,1877345.0,1877345.0,0.0,4903.0,0.0,0.0,0.0
65964,4599793.0,5.4252,3754691.0,3754691.0,0.0,6028.0,0.0,0.0,0.0,4.4127,...,0.0,0.0,9.8379,5632036.0,5632036.0,0.0,10931.0,0.0,0.0,0.0
65965,4600101.0,0.0000,,,,,,,,,...,,,,,,,,,,
65966,4599761.0,4.1337,1877348.0,1877348.0,0.0,4593.0,0.0,0.0,0.0,0.0000,...,0.0,0.0,4.1337,1877348.0,1877348.0,0.0,4593.0,0.0,0.0,0.0


In [29]:
ws = Accumulation(
    cat, accum["comids"], accum["lengths"], accum["upstream"], "Ws"
)
ws

Index(['COMID', 'WsAreaSqKm', 'WsGRIDCODE', 'Wszone', 'Wsmean', 'WsCount',
       'WsSum', 'Wsmin', 'Wsmax', 'UpWsAreaSqKm', 'UpWsGRIDCODE', 'UpWszone',
       'UpWsmean', 'UpWsCount', 'UpWsSum', 'UpWsmin', 'UpWsmax'],
      dtype='object')
0        False
1        False
2        False
3        False
4        False
         ...  
65963    False
65964    False
65965    False
65966    False
65967    False
Name: WsAreaSqKm, Length: 65968, dtype: bool
Index(['WsGRIDCODE', 'Wszone', 'Wsmean', 'WsCount', 'WsSum', 'Wsmin', 'Wsmax',
       'UpWsAreaSqKm', 'UpWsGRIDCODE', 'UpWszone', 'UpWsmean', 'UpWsCount',
       'UpWsSum', 'UpWsmin', 'UpWsmax'],
      dtype='object')


Unnamed: 0,COMID,WsAreaSqKm,WsGRIDCODE,Wszone,Wsmean,WsCount,WsSum,Wsmin,Wsmax,UpWsAreaSqKm,UpWsGRIDCODE,UpWszone,UpWsmean,UpWsCount,UpWsSum,UpWsmin,UpWsmax
0,718276.0,2.3103,1809703.0,1809703.0,0.0,2567.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,718808.0,3.9429,1809704.0,1809704.0,0.0,4381.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,718792.0,5.8995,1809705.0,1809705.0,0.0,6555.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,718288.0,2.8125,1809706.0,1809706.0,0.0,3125.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,718882.0,3.6603,1809707.0,1809707.0,0.0,4067.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65963,4600087.0,5.4252,3754691.0,3754691.0,0.0,6028.0,0.0,0.0,0.0,4.4127,1877345.0,1877345.0,0.0,4903.0,0.0,0.0,0.0
65964,4599793.0,5.4270,5632038.0,5632038.0,0.0,6030.0,0.0,0.0,0.0,9.8379,5632036.0,5632036.0,0.0,10931.0,0.0,0.0,0.0
65965,4600101.0,4.1337,1877348.0,1877348.0,0.0,4593.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
65966,4599761.0,8.0136,3754697.0,3754697.0,0.0,8904.0,0.0,0.0,0.0,4.1337,1877348.0,1877348.0,0.0,4593.0,0.0,0.0,0.0


In [30]:
if zone in inter_vpu.ToZone.values:
    cat = pd.read_csv(f"{OUT_DIR}/CanalDensity_{zone}.csv")

In [31]:
if zone in inter_vpu.FromZone.values:
    interVPU(
        ws,
        cat.columns[1:],
        row.accum_type,
        zone,
        Connector,
        inter_vpu.copy(),
    )

In [32]:
upFinal = pd.merge(up, ws, on="COMID")
final = pd.merge(cat, upFinal, on="COMID")

In [33]:
final.head()

Unnamed: 0,COMID,CatAreaSqKm,CatGRIDCODE,Catzone,Catmean,CatCount,CatSum,Catmin,Catmax,UpCatAreaSqKm_x,...,Wsmin_y,Wsmax_y,UpWsAreaSqKm,UpWsGRIDCODE,UpWszone,UpWsmean,UpWsCount,UpWsSum,UpWsmin,UpWsmax
0,718276,2.3103,1809703,1809703,0.0,2567.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,718808,3.9429,1809704,1809704,0.0,4381.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,718792,5.8995,1809705,1809705,0.0,6555.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,718288,2.8125,1809706,1809706,0.0,3125.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,718882,3.6603,1809707,1809707,0.0,4067.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
final.to_csv(f"{OUT_DIR}/final_CanalDensity_{zone}.csv", index=False)