In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
import time
import re

In [2]:
import os 
import sys 
parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from stream_cat_config import (
    LYR_DIR,
    MASK_DIR_RP100,
    MASK_DIR_SLP10,
    MASK_DIR_SLP20,
    ACCUM_DIR,
    NHD_DIR,
    OUT_DIR,
    PCT_FULL_FILE,
    PCT_FULL_FILE_RP100
)

from StreamCat_functions import (
    PointInPoly,
    createCatStats,
    mask_points
)

In [6]:
ctl = pd.read_csv('../ControlTable_StreamCat.csv')
inter_vpu = pd.read_csv("../InterVPU.csv")
INPUTS = np.load(ACCUM_DIR +"/vpu_inputs.npy", allow_pickle=True).item()

already_processed = []

In [7]:
print(INPUTS)
print(len(INPUTS))
print(len(ctl.loc[ctl['run'] == 1]))

OrderedDict({'06': 'MS', '05': 'MS', '10U': 'MS', '10L': 'MS', '07': 'MS', '11': 'MS', '14': 'CO', '01': 'NE', '17': 'PN', '16': 'GB', '15': 'CO', '13': 'RG', '12': 'TX', '09': 'SR', '02': 'MA', '08': 'MS', '04': 'GL', '03W': 'SA', '03S': 'SA', '03N': 'SA', '18': 'CA'})
21
1


In [None]:
# Parallel Cell

for _, row in ctl.query("run == 1").iterrows():
    
    apm = "" if row.AppendMetric == "none" else row.AppendMetric
    if row.use_mask == 1:
        mask_dir = MASK_DIR_RP100
    elif row.use_mask == 2:
        mask_dir = MASK_DIR_SLP10
    elif row.use_mask == 3:
        mask_dir = MASK_DIR_SLP20
    else:
        mask_dir = ""
    # landscape_layer_year = re.findall(r'\d+', row.LandscapeLayer)
    actual_layer_name = row.LandscapeLayer
    layer = (
        actual_layer_name
        if "/" in row.LandscapeLayer or "\\" in row.LandscapeLayer
        else (f"{LYR_DIR}/{actual_layer_name}")
    )  # use abspath
    print(layer)
    if isinstance(row.summaryfield, str):
        summary = row.summaryfield.split(";")
    else:
        summary = None
    if row.accum_type == "Point":
        # Load in point geopandas table and Pct_Full table
        # TODO: script to create this PCT_FULL_FILE
        pct_full = pd.read_csv(
            PCT_FULL_FILE if row.use_mask == 0 else PCT_FULL_FILE_RP100
        )
        points = gpd.read_file(layer)
        if mask_dir:
            points = mask_points(points, mask_dir, INPUTS)
    # File string to store InterVPUs needed for adjustments
    Connector = f"{OUT_DIR}/{row.FullTableName}_connectors.csv"
    print(
        f"Acquiring `{row.FullTableName}` catchment statistics...",
        end="",
        flush=True,
    )
    #for zone, hydroregion in INPUTS.items():
    def process_zone(zone, hydroregion):
        if not os.path.exists(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv"):
            print(zone, end=", ")
            pre = f"{NHD_DIR}/NHDPlus{hydroregion}/NHDPlus{zone}"
            if not row.accum_type == "Point":
                izd = (
                    f"{mask_dir}/{zone}.tif"
                    if mask_dir
                    else f"{pre}/NHDPlusCatchment/cat"
                )
                cat = createCatStats(
                    row.accum_type,
                    layer,
                    izd,
                    OUT_DIR,
                    zone,
                    row.by_RPU,
                    mask_dir,
                    NHD_DIR,
                    hydroregion,
                    apm,
                )
            if row.accum_type == "Point":
                izd = f"{pre}/NHDPlusCatchment/Catchment.shp"
                cat = PointInPoly(
                    points, zone, izd, pct_full, mask_dir, apm, summary
                )
            cat.to_csv(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv", index=False)
    start_time = time.time()
    zone_results = Parallel(n_jobs=16)(
        delayed(process_zone) (zone, hydroregion) for zone, hydroregion in INPUTS.items()
    )
    end_time = time.time()
    print(f"Processed {len(INPUTS)} in {end_time - start_time} seconds with {os.cpu_count()} parallel processes")
    print("done!")

O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/LandscapeRasters/Annual_NLCD_LndCov_1987_CU_C1V0.tif
Acquiring `N_Human_Waste_1987` catchment statistics...

RasterioIOError: O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/LandscapeRasters/Annual_NLCD_LndCov_1987_CU_C1V0.tif: No such file or directory

In [None]:
for _, row in ctl.query("run == 1").iterrows():

    apm = "" if row.AppendMetric == "none" else row.AppendMetric
    if row.use_mask == 1:
        mask_dir = MASK_DIR_RP100
    elif row.use_mask == 2:
        mask_dir = MASK_DIR_SLP10
    elif row.use_mask == 3:
        mask_dir = MASK_DIR_SLP20
    else:
        mask_dir = ""
    landscape_layer_year = re.findall(r'\d+', row.LandscapeLayer)
    actual_layer_name = f"Annual_NLCD_LndCov_{landscape_layer_year[0]}_CU_C1V0.tif"
    layer = (
        actual_layer_name
        if "/" in row.LandscapeLayer or "\\" in row.LandscapeLayer
        else (f"{LYR_DIR}/{actual_layer_name}")
    )  # use abspath
    print(layer)
    if isinstance(row.summaryfield, str):
        summary = row.summaryfield.split(";")
    else:
        summary = None
    if row.accum_type == "Point":
        # Load in point geopandas table and Pct_Full table
        # TODO: script to create this PCT_FULL_FILE
        pct_full = pd.read_csv(
            PCT_FULL_FILE if row.use_mask == 0 else PCT_FULL_FILE_RP100
        )
        points = gpd.read_file(layer)
        if mask_dir:
            points = mask_points(points, mask_dir, INPUTS)
    # File string to store InterVPUs needed for adjustments
    Connector = f"{OUT_DIR}/{row.FullTableName}_connectors.csv"
    print(
        f"Acquiring `{row.FullTableName}` catchment statistics...",
        end="",
        flush=True,
    )
    start_time = time.time()
    for zone, hydroregion in INPUTS.items():
        if not os.path.exists(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv"):
            print(zone, end=", ", flush=True)
            pre = f"{NHD_DIR}/NHDPlus{hydroregion}/NHDPlus{zone}"
            if not row.accum_type == "Point":
                izd = (
                    f"{mask_dir}/{zone}.tif"
                    if mask_dir
                    else f"{pre}/NHDPlusCatchment/cat"
                )
                cat = createCatStats(
                    row.accum_type,
                    layer,
                    izd,
                    OUT_DIR,
                    zone,
                    row.by_RPU,
                    mask_dir,
                    NHD_DIR,
                    hydroregion,
                    apm,
                )
            if row.accum_type == "Point":
                izd = f"{pre}/NHDPlusCatchment/Catchment.shp"
                cat = PointInPoly(
                    points, zone, izd, pct_full, mask_dir, apm, summary
                )
            cat.to_csv(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv", index=False)
    end_time = time.time()
    print(f"Processed {len(INPUTS)} in {end_time-start_time} seconds using a for loop and one process")
    print("done!")