In [1]:
from argparse import ArgumentParser

import os
import sys
import time

import matplotlib.pyplot as plt

import geopandas as gpd
import pandas as pd

from pandarallel import pandarallel

sys.path.insert(1, '../scripts/')
from reaches import readNHD
from utils import specialBuffer

In [2]:
# slurm = 197
slurm = int(os.environ['SLURM_ARRAY_TASK_ID'])
cpus = int(os.environ.get('SLURM_CPUS_PER_TASK'))
cpus_per_task = cpus if cpus < 65 else 1

In [3]:
# # FOR NOW, SET
# width_set = 'max'

### PARSE ARGUMENTS
parser = ArgumentParser(description='Please specify whether you would\
                        like to use the min, mean, or max predicted\
                        bankfull width for this analysis.')
parser.add_argument('width_set', type=str, help='min, mean, or max')
args=parser.parse_args()
width_set = args.width_set

In [4]:
# Control flow
if width_set == 'mean':
    width = 'WidthM'
elif width_set == 'min':
    width = 'WidthM_Min'
elif width_set == 'max':
    width = 'WidthM_Max'
else:
    print('Invalid width option specified, exiting.')
    # sys.exit()

In [5]:
## Prepare data
# Read segmented NHD
segments, huc4, huc2 = readNHD(index=slurm, segmented=True)

type: segmented
NHDPLUS_H_0512_HU4_GDB
/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented/HUC2_05/NHDPLUS_H_0512_HU4_GDB_prepped_segmented.parquet
segments read-in


In [6]:
pandarallel.initialize(nb_workers=cpus_per_task)

INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [7]:
# Buffer segments
## PARALLELIZE
start = time.time()
segments['buffer'] = segments.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width,
                                                               'flat', True, False),
                                                         axis=1)
end = time.time()
print(end - start)

141.34991335868835


In [8]:
# fig, ax = plt.subplots()
# segments.iloc[0:10].segments.plot(ax=ax, cmap='hsv')
# segments.iloc[0:10].buffers.plot(ax=ax, cmap='summer', alpha=0.7)

In [9]:
# Drop original reach geometry column, set segments as active geometry
segments = segments.drop(columns='segments').set_geometry('buffer').set_crs(crs=segments.crs)

In [10]:
save_path = '../../narrow_rivers_PIXC_data/NHD_prepped_segmented_buffered/'

In [11]:
# Write out
# Set write filepath
save_path = '../../narrow_rivers_PIXC_data/NHD_prepped_segmented_buffered/'
save_path = os.path.join(save_path, huc2)
save_file = huc4 + '_buffered_' + width_set + '.parquet'

#Write out gdf as parquet file
if not os.path.isdir(save_path):
    os.makedirs(save_path)
segments.to_parquet(os.path.join(save_path, save_file))

In [None]:
print('Script completed.')

In [12]:
# del segments

In [13]:
# start = time.time()
# test = gpd.read_parquet('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented_buffered/HUC2_05/NHDPLUS_H_0512_HU4_GDB_buffered.parquet')
# end = time.time()

# end - start

23.425416707992554